Re: [RFC PATCH 4/6] riscv: perf: Add raw event support

From: Zong Li
Date: Mon Jun 29 2020 - 16:42:10 EST


On Mon, Jun 29, 2020 at 12:17 PM Anup Patel <anup@xxxxxxxxxxxxxx> wrote:
>
> On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong.li@xxxxxxxxxx> wrote:
> >
> > Add support for raw events and hardware cache events. Currently, we set
> > the events by writing the mhpmeventN CSRs, it would raise an illegal
> > instruction exception and trap into m-mode to emulate event selector
> > CSRs access. It doesn't make sense because we shouldn't write the
> > m-mode CSRs in s-mode, it would be better that set events through SBI
> > call or the shadow CSRs of s-mode. We would change it later.
> >
> > Signed-off-by: Zong Li <zong.li@xxxxxxxxxx>
> > ---
> > arch/riscv/include/asm/perf_event.h | 65 ++++++---
> > arch/riscv/kernel/perf_event.c | 204 +++++++++++++++++++++++-----
> > 2 files changed, 215 insertions(+), 54 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> > index 062efd3a1d5d..41d515a1f331 100644
> > --- a/arch/riscv/include/asm/perf_event.h
> > +++ b/arch/riscv/include/asm/perf_event.h
> > @@ -14,39 +14,64 @@
> >
> > #ifdef CONFIG_RISCV_BASE_PMU
> > #define RISCV_BASE_COUNTERS 2
> > +#define RISCV_EVENT_COUNTERS 29
>
> Same comment as DT documentation related to naming.

Change it as well. Thanks.

>
> Regards,
> Anup
>
>
> > +#define RISCV_TOTAL_COUNTERS (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS)
> >
> > /*
> > - * The RISCV_MAX_COUNTERS parameter should be specified.
> > - */
> > -
> > -#define RISCV_MAX_COUNTERS 2
> > -
> > -/*
> > - * These are the indexes of bits in counteren register *minus* 1,
> > - * except for cycle. It would be coherent if it can directly mapped
> > - * to counteren bit definition, but there is a *time* register at
> > - * counteren[1]. Per-cpu structure is scarce resource here.
> > - *
> > * According to the spec, an implementation can support counter up to
> > * mhpmcounter31, but many high-end processors has at most 6 general
> > * PMCs, we give the definition to MHPMCOUNTER8 here.
> > */
> > -#define RISCV_PMU_CYCLE 0
> > -#define RISCV_PMU_INSTRET 1
> > -#define RISCV_PMU_MHPMCOUNTER3 2
> > -#define RISCV_PMU_MHPMCOUNTER4 3
> > -#define RISCV_PMU_MHPMCOUNTER5 4
> > -#define RISCV_PMU_MHPMCOUNTER6 5
> > -#define RISCV_PMU_MHPMCOUNTER7 6
> > -#define RISCV_PMU_MHPMCOUNTER8 7
> > +#define RISCV_PMU_CYCLE 0
> > +#define RISCV_PMU_INSTRET 2
> > +#define RISCV_PMU_HPMCOUNTER3 3
> > +#define RISCV_PMU_HPMCOUNTER4 4
> > +#define RISCV_PMU_HPMCOUNTER5 5
> > +#define RISCV_PMU_HPMCOUNTER6 6
> > +#define RISCV_PMU_HPMCOUNTER7 7
> > +#define RISCV_PMU_HPMCOUNTER8 8
> > +
> > +#define RISCV_PMU_HPMCOUNTER_FIRST 3
> > +#define RISCV_PMU_HPMCOUNTER_LAST \
> > + (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1)
> >
> > #define RISCV_OP_UNSUPP (-EOPNOTSUPP)
> >
> > +/* Hardware cache event encoding */
> > +#define PERF_HW_CACHE_TYPE 0
> > +#define PERF_HW_CACHE_OP 8
> > +#define PERF_HW_CACHE_RESULT 16
> > +#define PERF_HW_CACHE_MASK 0xff
> > +
> > +/* config_base encoding */
> > +#define RISCV_PMU_TYPE_MASK 0x3
> > +#define RISCV_PMU_TYPE_BASE 0x1
> > +#define RISCV_PMU_TYPE_EVENT 0x2
> > +#define RISCV_PMU_EXCLUDE_MASK 0xc
> > +#define RISCV_PMU_EXCLUDE_USER 0x3
> > +#define RISCV_PMU_EXCLUDE_KERNEL 0x4
> > +
> > +/*
> > + * Currently, machine-mode supports emulation of mhpmeventN. Setting mhpmeventN
> > + * to raise an illegal instruction exception to set event types in machine-mode.
> > + * Eventually, we should set event types through standard SBI call or the shadow
> > + * CSRs of supervisor-mode, because it is weird for writing CSR of machine-mode
> > + * explicitly in supervisor-mode. These macro should be removed in the future.
> > + */
> > +#define CSR_MHPMEVENT3 0x323
> > +#define CSR_MHPMEVENT4 0x324
> > +#define CSR_MHPMEVENT5 0x325
> > +#define CSR_MHPMEVENT6 0x326
> > +#define CSR_MHPMEVENT7 0x327
> > +#define CSR_MHPMEVENT8 0x328
> > +
> > struct cpu_hw_events {
> > /* # currently enabled events*/
> > int n_events;
> > /* currently enabled events */
> > - struct perf_event *events[RISCV_MAX_COUNTERS];
> > + struct perf_event *events[RISCV_EVENT_COUNTERS];
> > + /* bitmap of used event counters */
> > + unsigned long used_cntr_mask;
> > /* vendor-defined PMU data */
> > void *platform;
> > };
> > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> > index c835f0362d94..0cfcd6f1e57b 100644
> > --- a/arch/riscv/kernel/perf_event.c
> > +++ b/arch/riscv/kernel/perf_event.c
> > @@ -139,6 +139,53 @@ static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
> > },
> > };
> >
> > +/*
> > + * Methods for checking and getting PMU information
> > + */
> > +
> > +static inline int is_base_counter(int idx)
> > +{
> > + return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET);
> > +}
> > +
> > +static inline int is_event_counter(int idx)
> > +{
> > + return (idx >= RISCV_PMU_HPMCOUNTER_FIRST &&
> > + idx <= RISCV_PMU_HPMCOUNTER_LAST);
> > +}
> > +
> > +static inline int get_available_counter(struct perf_event *event)
> > +{
> > + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > + struct hw_perf_event *hwc = &event->hw;
> > + unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK;
> > + unsigned long mask;
> > + int ret;
> > +
> > + switch (config_base) {
> > + case RISCV_PMU_TYPE_BASE:
> > + ret = hwc->config;
> > + if (WARN_ON_ONCE(!is_base_counter(ret)))
> > + return -ENOSPC;
> > + break;
> > + case RISCV_PMU_TYPE_EVENT:
> > + mask = ~cpuc->used_cntr_mask;
> > + ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3);
> > + if (WARN_ON_ONCE(!is_event_counter(ret)))
> > + return -ENOSPC;
> > + break;
> > + default:
> > + return -ENOENT;
> > + }
> > +
> > + __set_bit(ret, &cpuc->used_cntr_mask);
> > +
> > + return ret;
> > +}
> > +
> > +/*
> > + * Map generic hardware event
> > + */
> > static int riscv_map_hw_event(u64 config)
> > {
> > if (config >= riscv_pmu->max_events)
> > @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config)
> > return riscv_pmu->hw_events[config];
> > }
> >
> > -static int riscv_map_cache_decode(u64 config, unsigned int *type,
> > - unsigned int *op, unsigned int *result)
> > -{
> > - return -ENOENT;
> > -}
> > -
> > +/*
> > + * Map generic hardware cache event
> > + */
> > static int riscv_map_cache_event(u64 config)
> > {
> > unsigned int type, op, result;
> > - int err = -ENOENT;
> > - int code;
> > + int ret;
> >
> > - err = riscv_map_cache_decode(config, &type, &op, &result);
> > - if (!riscv_pmu->cache_events || err)
> > - return err;
> > + type = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK;
> > + op = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK;
> > + result = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK;
> >
> > if (type >= PERF_COUNT_HW_CACHE_MAX ||
> > op >= PERF_COUNT_HW_CACHE_OP_MAX ||
> > result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
> > return -EINVAL;
> >
> > - code = (*riscv_pmu->cache_events)[type][op][result];
> > - if (code == RISCV_OP_UNSUPP)
> > + ret = riscv_cache_event_map[type][op][result];
> > + if (ret == RISCV_OP_UNSUPP)
> > return -EINVAL;
> >
> > - return code;
> > + return ret == RISCV_OP_UNSUPP ? -ENOENT : ret;
> > }
> >
> > /*
> > @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx)
> > case RISCV_PMU_INSTRET:
> > val = csr_read(CSR_INSTRET);
> > break;
> > + case RISCV_PMU_HPMCOUNTER3:
> > + val = csr_read(CSR_HPMCOUNTER3);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER4:
> > + val = csr_read(CSR_HPMCOUNTER4);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER5:
> > + val = csr_read(CSR_HPMCOUNTER5);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER6:
> > + val = csr_read(CSR_HPMCOUNTER6);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER7:
> > + val = csr_read(CSR_HPMCOUNTER7);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER8:
> > + val = csr_read(CSR_HPMCOUNTER8);
>
> This is broken for RV32 because for RV32 we have to read two
> CSRs to get a counter value.

Oh yes, thanks for your reminder. Add them in the next version.

>
> Also, for correctly reading a 64bit counter on RV32 we have
> to read just like get_cycles64() does for RV32.
>
> static inline u64 get_cycles64(void)
> {
> u32 hi, lo;
>
> do {
> hi = get_cycles_hi();
> lo = get_cycles();
> } while (hi != get_cycles_hi());
>
> return ((u64)hi << 32) | lo;
> }
>
> Regards,
> Anup
>
>
> > + break;
> > default:
> > - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
> > + WARN_ON_ONCE(idx < RISCV_PMU_CYCLE ||
> > + idx > RISCV_TOTAL_COUNTERS);
> > return -EINVAL;
> > }
> >
> > @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value)
> > WARN_ON_ONCE(1);
> > }
> >
> > +static inline void write_event(int idx, u64 value)
> > +{
> > + /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally,
> > + * it need to set the event selector by SBI call or the s-mode
> > + * shadow CSRs of them. Exploit illegal instruction exception to
> > + * emulate mhpmcounterN access in m-mode.
> > + */
> > + switch (idx) {
> > + case RISCV_PMU_HPMCOUNTER3:
> > + csr_write(CSR_MHPMEVENT3, value);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER4:
> > + csr_write(CSR_MHPMEVENT4, value);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER5:
> > + csr_write(CSR_MHPMEVENT5, value);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER6:
> > + csr_write(CSR_MHPMEVENT6, value);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER7:
> > + csr_write(CSR_MHPMEVENT7, value);
> > + break;
> > + case RISCV_PMU_HPMCOUNTER8:
> > + csr_write(CSR_MHPMEVENT8, value);
> > + break;
> > + default:
> > + WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 ||
> > + idx > RISCV_TOTAL_COUNTERS);
> > + return;
> > + }
> > +}

I was also wondering if you have any suggestions about the PMU SBI
extension as I mentioned in the cover letter. Currently, we set the
event selectors by emulation of OpenSBI, so just write the m-mode CSRs
as above.

> > +
> > +/*
> > + * Enable and disable event counters
> > + */
> > +
> > +static inline void riscv_pmu_enable_event(struct perf_event *event)
> > +{
> > + struct hw_perf_event *hwc = &event->hw;
> > + int idx = hwc->idx;
> > +
> > + if (is_event_counter(idx))
> > + write_event(idx, hwc->config);
> > +
> > + /*
> > + * Since we cannot write to counters, this serves as an initialization
> > + * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> > + * wrong when pmu->read is called for the first time.
> > + */
> > + local64_set(&hwc->prev_count, read_counter(hwc->idx));
> > +}
> > +
> > +static inline void riscv_pmu_disable_event(struct perf_event *event)
> > +{
> > + struct hw_perf_event *hwc = &event->hw;
> > + int idx = hwc->idx;
> > +
> > + if (is_event_counter(idx))
> > + write_event(idx, 0);
> > +}
> > +
> > /*
> > * pmu->read: read and update the counter
> > *
> > @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event)
> > */
> > delta = (new_raw_count - prev_raw_count) &
> > ((1ULL << riscv_pmu->counter_width) - 1);
> > +
> > local64_add(delta, &event->count);
> > /*
> > * Something like local64_sub(delta, &hwc->period_left) here is
> > @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, int flags)
> > {
> > struct hw_perf_event *hwc = &event->hw;
> >
> > + if (WARN_ON_ONCE(hwc->idx == -1))
> > + return;
> > +
> > + riscv_pmu_disable_event(event);
> > +
> > WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
> > hwc->state |= PERF_HES_STOPPED;
> >
> > @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> > if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> > return;
> >
> > + if (WARN_ON_ONCE(hwc->idx == -1))
> > + return;
> > +
> > if (flags & PERF_EF_RELOAD) {
> > WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> >
> > @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> > }
> >
> > hwc->state = 0;
> > - perf_event_update_userpage(event);
> >
> > - /*
> > - * Since we cannot write to counters, this serves as an initialization
> > - * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> > - * wrong when pmu->read is called for the first time.
> > - */
> > - local64_set(&hwc->prev_count, read_counter(hwc->idx));
> > + riscv_pmu_enable_event(event);
> > +
> > + perf_event_update_userpage(event);
> > }
> >
> > /*
> > @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
> > {
> > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > struct hw_perf_event *hwc = &event->hw;
> > + int count_idx;
> >
> > if (cpuc->n_events == riscv_pmu->num_counters)
> > return -ENOSPC;
> >
> > - /*
> > - * We don't have general conunters, so no binding-event-to-counter
> > - * process here.
> > - *
> > - * Indexing using hwc->config generally not works, since config may
> > - * contain extra information, but here the only info we have in
> > - * hwc->config is the event index.
> > - */
> > - hwc->idx = hwc->config;
> > - cpuc->events[hwc->idx] = event;
> > + count_idx = get_available_counter(event);
> > + if (count_idx < 0)
> > + return -ENOSPC;
> > +
> > cpuc->n_events++;
> > + hwc->idx = count_idx;
> > + cpuc->events[hwc->idx] = event;
> >
> > hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> >
> > @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
> > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > struct hw_perf_event *hwc = &event->hw;
> >
> > - cpuc->events[hwc->idx] = NULL;
> > cpuc->n_events--;
> > + __clear_bit(hwc->idx, &cpuc->used_cntr_mask);
> > +
> > + cpuc->events[hwc->idx] = NULL;
> > riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
> > perf_event_update_userpage(event);
> > }
> > @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event)
> > {
> > struct perf_event_attr *attr = &event->attr;
> > struct hw_perf_event *hwc = &event->hw;
> > + unsigned long config_base = 0;
> > int err;
> > int code;
> >
> > @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event)
> > code = riscv_pmu->map_cache_event(attr->config);
> > break;
> > case PERF_TYPE_RAW:
> > - return -EOPNOTSUPP;
> > + code = attr->config;
> > + break;
> > default:
> > return -ENOENT;
> > }
> >
> > + if (is_base_counter(code))
> > + config_base |= RISCV_PMU_TYPE_BASE;
> > + else
> > + config_base |= RISCV_PMU_TYPE_EVENT;
> > +
> > event->destroy = riscv_event_destroy;
> > if (code < 0) {
> > event->destroy(event);
> > @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event)
> > * But since we don't have such support, later in pmu->add(), we just
> > * use hwc->config as the index instead.
> > */
> > + hwc->config_base = config_base;
> > hwc->config = code;
> > hwc->idx = -1;
> >
> > --
> > 2.27.0
> >