Re: [PATCH V2 1/1] perf/x86: Add Intel power cstate PMUs support

From: Peter Zijlstra
Date: Thu Aug 06 2015 - 11:44:33 EST


On Mon, Jul 27, 2015 at 04:46:16AM -0400, kan.liang@xxxxxxxxx wrote:

As a general comment; this thing is unreadable. Far too much macro foo
to instantiate the different PMUs.

> +struct perf_power_cstate_event_msr {
> + int id;
> + u64 msr;
> +};
> +
> +enum perf_power_cstate_id {
> + /*
> + * power_cstate events, generalized by the kernel:
> + */
> + PERF_POWER_CORE_C1_RES = 0,
> + PERF_POWER_CORE_C3_RES,
> + PERF_POWER_CORE_C6_RES,
> + PERF_POWER_CORE_C7_RES,

These are two different PMUs, why are they in the same enum space?

> + PERF_POWER_PKG_C2_RES,
> + PERF_POWER_PKG_C3_RES,
> + PERF_POWER_PKG_C6_RES,
> + PERF_POWER_PKG_C7_RES,
> + PERF_POWER_PKG_C8_RES,
> + PERF_POWER_PKG_C9_RES,
> + PERF_POWER_PKG_C10_RES,
> +
> + PERF_POWER_CSTATE_EVENT_MAX, /* non-ABI */
> +};
> +
> +struct power_cstate_pmu {
> + struct intel_power_cstate_type *power_cstate_type;
> + struct pmu *pmu;
> +};
> +
> +
> +static struct intel_power_cstate_type *empty_power_cstate[] = { NULL, };
> +struct intel_power_cstate_type **power_cstate = empty_power_cstate;
> +
> +static struct perf_power_cstate_event_msr power_cstate_events[] = {
> + { PERF_POWER_CORE_C1_RES, MSR_CORE_C1_RES },
> + { PERF_POWER_CORE_C3_RES, MSR_CORE_C3_RESIDENCY },
> + { PERF_POWER_CORE_C6_RES, MSR_CORE_C6_RESIDENCY },
> + { PERF_POWER_CORE_C7_RES, MSR_CORE_C7_RESIDENCY },
> + { PERF_POWER_PKG_C2_RES, MSR_PKG_C2_RESIDENCY },
> + { PERF_POWER_PKG_C3_RES, MSR_PKG_C3_RESIDENCY },
> + { PERF_POWER_PKG_C6_RES, MSR_PKG_C6_RESIDENCY },
> + { PERF_POWER_PKG_C7_RES, MSR_PKG_C7_RESIDENCY },
> + { PERF_POWER_PKG_C8_RES, MSR_PKG_C8_RESIDENCY },
> + { PERF_POWER_PKG_C9_RES, MSR_PKG_C9_RESIDENCY },
> + { PERF_POWER_PKG_C10_RES, MSR_PKG_C10_RESIDENCY },
> +};

What's the point of the first entry? You already know which index it is,
no point in storing that again.

> +
> +EVENT_ATTR_STR(c1-residency, power_core_c1_res, "event=0x00");
> +EVENT_ATTR_STR(c3-residency, power_core_c3_res, "event=0x01");
> +EVENT_ATTR_STR(c6-residency, power_core_c6_res, "event=0x02");
> +EVENT_ATTR_STR(c7-residency, power_core_c7_res, "event=0x03");

> +EVENT_ATTR_STR(c2-residency, power_pkg_c2_res, "event=0x04");

I would very much expect this thing to start counting at 0 again.

> +EVENT_ATTR_STR(c3-residency, power_pkg_c3_res, "event=0x05");
> +EVENT_ATTR_STR(c6-residency, power_pkg_c6_res, "event=0x06");
> +EVENT_ATTR_STR(c7-residency, power_pkg_c7_res, "event=0x07");
> +EVENT_ATTR_STR(c8-residency, power_pkg_c8_res, "event=0x08");
> +EVENT_ATTR_STR(c9-residency, power_pkg_c9_res, "event=0x09");
> +EVENT_ATTR_STR(c10-residency, power_pkg_c10_res, "event=0x0a");
> +
> +static cpumask_t power_cstate_core_cpu_mask;

That one typically does not need a cpumask.

> +static cpumask_t power_cstate_pkg_cpu_mask;

> +static int power_cstate_pmu_event_init(struct perf_event *event)
> +{
> + u64 cfg = event->attr.config;
> + int ret = 0;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + /*
> + * check event is known (determines counter)
> + */
> + if (cfg >= PERF_POWER_CSTATE_EVENT_MAX)
> + return -EINVAL;
> +
> + /* unsupported modes and filters */
> + if (event->attr.exclude_user ||
> + event->attr.exclude_kernel ||
> + event->attr.exclude_hv ||
> + event->attr.exclude_idle ||
> + event->attr.exclude_host ||
> + event->attr.exclude_guest ||
> + event->attr.sample_period) /* no sampling */
> + return -EINVAL;

Same as with the MSR thing, but worse. What happens if we hand a config
value for the 'wrong' PMU type?

What happens if we hand a pkg c10 config to one that doesn't have c10?

> + /* must be done before validate_group */
> + event->hw.event_base = power_cstate_events[cfg].msr;
> + event->hw.config = cfg;
> + event->hw.idx = -1;
> +
> + return ret;
> +}
> +
> +static inline u64 power_cstate_pmu_read_counter(struct perf_event *event)
> +{
> + u64 val;
> +
> + rdmsrl_safe(event->hw.event_base, &val);

No, at this point you had better know if that MSR exists or not. If it
does not the event should not exist.

> + return val;
> +}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/