Re: [RFC PATCH 4/6] riscv: perf: Add raw event support

From: Anup Patel
Date: Mon Jun 29 2020 - 16:56:02 EST


On Mon, Jun 29, 2020 at 10:05 AM Zong Li <zong.li@xxxxxxxxxx> wrote:
>
> On Mon, Jun 29, 2020 at 12:17 PM Anup Patel <anup@xxxxxxxxxxxxxx> wrote:
> >
> > On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong.li@xxxxxxxxxx> wrote:
> > >
> > > Add support for raw events and hardware cache events. Currently, we set
> > > the events by writing the mhpmeventN CSRs, it would raise an illegal
> > > instruction exception and trap into m-mode to emulate event selector
> > > CSRs access. It doesn't make sense because we shouldn't write the
> > > m-mode CSRs in s-mode, it would be better that set events through SBI
> > > call or the shadow CSRs of s-mode. We would change it later.
> > >
> > > Signed-off-by: Zong Li <zong.li@xxxxxxxxxx>
> > > ---
> > > arch/riscv/include/asm/perf_event.h | 65 ++++++---
> > > arch/riscv/kernel/perf_event.c | 204 +++++++++++++++++++++++-----
> > > 2 files changed, 215 insertions(+), 54 deletions(-)
> > >
> > > diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> > > index 062efd3a1d5d..41d515a1f331 100644
> > > --- a/arch/riscv/include/asm/perf_event.h
> > > +++ b/arch/riscv/include/asm/perf_event.h
> > > @@ -14,39 +14,64 @@
> > >
> > > #ifdef CONFIG_RISCV_BASE_PMU
> > > #define RISCV_BASE_COUNTERS 2
> > > +#define RISCV_EVENT_COUNTERS 29
> >
> > Same comment as DT documentation related to naming.
>
> Change it as well. Thanks.
>
> >
> > Regards,
> > Anup
> >
> >
> > > +#define RISCV_TOTAL_COUNTERS (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS)
> > >
> > > /*
> > > - * The RISCV_MAX_COUNTERS parameter should be specified.
> > > - */
> > > -
> > > -#define RISCV_MAX_COUNTERS 2
> > > -
> > > -/*
> > > - * These are the indexes of bits in counteren register *minus* 1,
> > > - * except for cycle. It would be coherent if it can directly mapped
> > > - * to counteren bit definition, but there is a *time* register at
> > > - * counteren[1]. Per-cpu structure is scarce resource here.
> > > - *
> > > * According to the spec, an implementation can support counter up to
> > > * mhpmcounter31, but many high-end processors has at most 6 general
> > > * PMCs, we give the definition to MHPMCOUNTER8 here.
> > > */
> > > -#define RISCV_PMU_CYCLE 0
> > > -#define RISCV_PMU_INSTRET 1
> > > -#define RISCV_PMU_MHPMCOUNTER3 2
> > > -#define RISCV_PMU_MHPMCOUNTER4 3
> > > -#define RISCV_PMU_MHPMCOUNTER5 4
> > > -#define RISCV_PMU_MHPMCOUNTER6 5
> > > -#define RISCV_PMU_MHPMCOUNTER7 6
> > > -#define RISCV_PMU_MHPMCOUNTER8 7
> > > +#define RISCV_PMU_CYCLE 0
> > > +#define RISCV_PMU_INSTRET 2
> > > +#define RISCV_PMU_HPMCOUNTER3 3
> > > +#define RISCV_PMU_HPMCOUNTER4 4
> > > +#define RISCV_PMU_HPMCOUNTER5 5
> > > +#define RISCV_PMU_HPMCOUNTER6 6
> > > +#define RISCV_PMU_HPMCOUNTER7 7
> > > +#define RISCV_PMU_HPMCOUNTER8 8
> > > +
> > > +#define RISCV_PMU_HPMCOUNTER_FIRST 3
> > > +#define RISCV_PMU_HPMCOUNTER_LAST \
> > > + (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1)
> > >
> > > #define RISCV_OP_UNSUPP (-EOPNOTSUPP)
> > >
> > > +/* Hardware cache event encoding */
> > > +#define PERF_HW_CACHE_TYPE 0
> > > +#define PERF_HW_CACHE_OP 8
> > > +#define PERF_HW_CACHE_RESULT 16
> > > +#define PERF_HW_CACHE_MASK 0xff
> > > +
> > > +/* config_base encoding */
> > > +#define RISCV_PMU_TYPE_MASK 0x3
> > > +#define RISCV_PMU_TYPE_BASE 0x1
> > > +#define RISCV_PMU_TYPE_EVENT 0x2
> > > +#define RISCV_PMU_EXCLUDE_MASK 0xc
> > > +#define RISCV_PMU_EXCLUDE_USER 0x3
> > > +#define RISCV_PMU_EXCLUDE_KERNEL 0x4
> > > +
> > > +/*
> > > + * Currently, machine-mode supports emulation of mhpmeventN. Setting mhpmeventN
> > > + * to raise an illegal instruction exception to set event types in machine-mode.
> > > + * Eventually, we should set event types through standard SBI call or the shadow
> > > + * CSRs of supervisor-mode, because it is weird for writing CSR of machine-mode
> > > + * explicitly in supervisor-mode. These macro should be removed in the future.
> > > + */
> > > +#define CSR_MHPMEVENT3 0x323
> > > +#define CSR_MHPMEVENT4 0x324
> > > +#define CSR_MHPMEVENT5 0x325
> > > +#define CSR_MHPMEVENT6 0x326
> > > +#define CSR_MHPMEVENT7 0x327
> > > +#define CSR_MHPMEVENT8 0x328
> > > +
> > > struct cpu_hw_events {
> > > /* # currently enabled events*/
> > > int n_events;
> > > /* currently enabled events */
> > > - struct perf_event *events[RISCV_MAX_COUNTERS];
> > > + struct perf_event *events[RISCV_EVENT_COUNTERS];
> > > + /* bitmap of used event counters */
> > > + unsigned long used_cntr_mask;
> > > /* vendor-defined PMU data */
> > > void *platform;
> > > };
> > > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> > > index c835f0362d94..0cfcd6f1e57b 100644
> > > --- a/arch/riscv/kernel/perf_event.c
> > > +++ b/arch/riscv/kernel/perf_event.c
> > > @@ -139,6 +139,53 @@ static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
> > > },
> > > };
> > >
> > > +/*
> > > + * Methods for checking and getting PMU information
> > > + */
> > > +
> > > +static inline int is_base_counter(int idx)
> > > +{
> > > + return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET);
> > > +}
> > > +
> > > +static inline int is_event_counter(int idx)
> > > +{
> > > + return (idx >= RISCV_PMU_HPMCOUNTER_FIRST &&
> > > + idx <= RISCV_PMU_HPMCOUNTER_LAST);
> > > +}
> > > +
> > > +static inline int get_available_counter(struct perf_event *event)
> > > +{
> > > + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > > + struct hw_perf_event *hwc = &event->hw;
> > > + unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK;
> > > + unsigned long mask;
> > > + int ret;
> > > +
> > > + switch (config_base) {
> > > + case RISCV_PMU_TYPE_BASE:
> > > + ret = hwc->config;
> > > + if (WARN_ON_ONCE(!is_base_counter(ret)))
> > > + return -ENOSPC;
> > > + break;
> > > + case RISCV_PMU_TYPE_EVENT:
> > > + mask = ~cpuc->used_cntr_mask;
> > > + ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3);
> > > + if (WARN_ON_ONCE(!is_event_counter(ret)))
> > > + return -ENOSPC;
> > > + break;
> > > + default:
> > > + return -ENOENT;
> > > + }
> > > +
> > > + __set_bit(ret, &cpuc->used_cntr_mask);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +/*
> > > + * Map generic hardware event
> > > + */
> > > static int riscv_map_hw_event(u64 config)
> > > {
> > > if (config >= riscv_pmu->max_events)
> > > @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config)
> > > return riscv_pmu->hw_events[config];
> > > }
> > >
> > > -static int riscv_map_cache_decode(u64 config, unsigned int *type,
> > > - unsigned int *op, unsigned int *result)
> > > -{
> > > - return -ENOENT;
> > > -}
> > > -
> > > +/*
> > > + * Map generic hardware cache event
> > > + */
> > > static int riscv_map_cache_event(u64 config)
> > > {
> > > unsigned int type, op, result;
> > > - int err = -ENOENT;
> > > - int code;
> > > + int ret;
> > >
> > > - err = riscv_map_cache_decode(config, &type, &op, &result);
> > > - if (!riscv_pmu->cache_events || err)
> > > - return err;
> > > + type = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK;
> > > + op = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK;
> > > + result = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK;
> > >
> > > if (type >= PERF_COUNT_HW_CACHE_MAX ||
> > > op >= PERF_COUNT_HW_CACHE_OP_MAX ||
> > > result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
> > > return -EINVAL;
> > >
> > > - code = (*riscv_pmu->cache_events)[type][op][result];
> > > - if (code == RISCV_OP_UNSUPP)
> > > + ret = riscv_cache_event_map[type][op][result];
> > > + if (ret == RISCV_OP_UNSUPP)
> > > return -EINVAL;
> > >
> > > - return code;
> > > + return ret == RISCV_OP_UNSUPP ? -ENOENT : ret;
> > > }
> > >
> > > /*
> > > @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx)
> > > case RISCV_PMU_INSTRET:
> > > val = csr_read(CSR_INSTRET);
> > > break;
> > > + case RISCV_PMU_HPMCOUNTER3:
> > > + val = csr_read(CSR_HPMCOUNTER3);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER4:
> > > + val = csr_read(CSR_HPMCOUNTER4);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER5:
> > > + val = csr_read(CSR_HPMCOUNTER5);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER6:
> > > + val = csr_read(CSR_HPMCOUNTER6);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER7:
> > > + val = csr_read(CSR_HPMCOUNTER7);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER8:
> > > + val = csr_read(CSR_HPMCOUNTER8);
> >
> > This is broken for RV32 because for RV32 we have to read two
> > CSRs to get a counter value.
>
> Oh yes, thanks for your reminder. Add them in the next version.
>
> >
> > Also, for correctly reading a 64bit counter on RV32 we have
> > to read just like get_cycles64() does for RV32.
> >
> > static inline u64 get_cycles64(void)
> > {
> > u32 hi, lo;
> >
> > do {
> > hi = get_cycles_hi();
> > lo = get_cycles();
> > } while (hi != get_cycles_hi());
> >
> > return ((u64)hi << 32) | lo;
> > }
> >
> > Regards,
> > Anup
> >
> >
> > > + break;
> > > default:
> > > - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
> > > + WARN_ON_ONCE(idx < RISCV_PMU_CYCLE ||
> > > + idx > RISCV_TOTAL_COUNTERS);
> > > return -EINVAL;
> > > }
> > >
> > > @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value)
> > > WARN_ON_ONCE(1);
> > > }
> > >
> > > +static inline void write_event(int idx, u64 value)
> > > +{
> > > + /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally,
> > > + * it need to set the event selector by SBI call or the s-mode
> > > + * shadow CSRs of them. Exploit illegal instruction exception to
> > > + * emulate mhpmcounterN access in m-mode.
> > > + */
> > > + switch (idx) {
> > > + case RISCV_PMU_HPMCOUNTER3:
> > > + csr_write(CSR_MHPMEVENT3, value);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER4:
> > > + csr_write(CSR_MHPMEVENT4, value);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER5:
> > > + csr_write(CSR_MHPMEVENT5, value);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER6:
> > > + csr_write(CSR_MHPMEVENT6, value);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER7:
> > > + csr_write(CSR_MHPMEVENT7, value);
> > > + break;
> > > + case RISCV_PMU_HPMCOUNTER8:
> > > + csr_write(CSR_MHPMEVENT8, value);
> > > + break;
> > > + default:
> > > + WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 ||
> > > + idx > RISCV_TOTAL_COUNTERS);
> > > + return;
> > > + }
> > > +}
>
> I was also wondering if you have any suggestions about the PMU SBI
> extension as I mentioned in the cover letter. Currently, we set the
> event selectors by emulation of OpenSBI, so just write the m-mode CSRs
> as above.

That's a separate topic but design of this driver will pave way for
defining SBI perf counter calls. Let's get this driver in good shape
first so that it helps us in defining SBI calls for SBI-level perf counters.

Regards,
Anup

>
> > > +
> > > +/*
> > > + * Enable and disable event counters
> > > + */
> > > +
> > > +static inline void riscv_pmu_enable_event(struct perf_event *event)
> > > +{
> > > + struct hw_perf_event *hwc = &event->hw;
> > > + int idx = hwc->idx;
> > > +
> > > + if (is_event_counter(idx))
> > > + write_event(idx, hwc->config);
> > > +
> > > + /*
> > > + * Since we cannot write to counters, this serves as an initialization
> > > + * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> > > + * wrong when pmu->read is called for the first time.
> > > + */
> > > + local64_set(&hwc->prev_count, read_counter(hwc->idx));
> > > +}
> > > +
> > > +static inline void riscv_pmu_disable_event(struct perf_event *event)
> > > +{
> > > + struct hw_perf_event *hwc = &event->hw;
> > > + int idx = hwc->idx;
> > > +
> > > + if (is_event_counter(idx))
> > > + write_event(idx, 0);
> > > +}
> > > +
> > > /*
> > > * pmu->read: read and update the counter
> > > *
> > > @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event)
> > > */
> > > delta = (new_raw_count - prev_raw_count) &
> > > ((1ULL << riscv_pmu->counter_width) - 1);
> > > +
> > > local64_add(delta, &event->count);
> > > /*
> > > * Something like local64_sub(delta, &hwc->period_left) here is
> > > @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, int flags)
> > > {
> > > struct hw_perf_event *hwc = &event->hw;
> > >
> > > + if (WARN_ON_ONCE(hwc->idx == -1))
> > > + return;
> > > +
> > > + riscv_pmu_disable_event(event);
> > > +
> > > WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
> > > hwc->state |= PERF_HES_STOPPED;
> > >
> > > @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> > > if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> > > return;
> > >
> > > + if (WARN_ON_ONCE(hwc->idx == -1))
> > > + return;
> > > +
> > > if (flags & PERF_EF_RELOAD) {
> > > WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
> > >
> > > @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> > > }
> > >
> > > hwc->state = 0;
> > > - perf_event_update_userpage(event);
> > >
> > > - /*
> > > - * Since we cannot write to counters, this serves as an initialization
> > > - * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> > > - * wrong when pmu->read is called for the first time.
> > > - */
> > > - local64_set(&hwc->prev_count, read_counter(hwc->idx));
> > > + riscv_pmu_enable_event(event);
> > > +
> > > + perf_event_update_userpage(event);
> > > }
> > >
> > > /*
> > > @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
> > > {
> > > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > > struct hw_perf_event *hwc = &event->hw;
> > > + int count_idx;
> > >
> > > if (cpuc->n_events == riscv_pmu->num_counters)
> > > return -ENOSPC;
> > >
> > > - /*
> > > - * We don't have general conunters, so no binding-event-to-counter
> > > - * process here.
> > > - *
> > > - * Indexing using hwc->config generally not works, since config may
> > > - * contain extra information, but here the only info we have in
> > > - * hwc->config is the event index.
> > > - */
> > > - hwc->idx = hwc->config;
> > > - cpuc->events[hwc->idx] = event;
> > > + count_idx = get_available_counter(event);
> > > + if (count_idx < 0)
> > > + return -ENOSPC;
> > > +
> > > cpuc->n_events++;
> > > + hwc->idx = count_idx;
> > > + cpuc->events[hwc->idx] = event;
> > >
> > > hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> > >
> > > @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
> > > struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> > > struct hw_perf_event *hwc = &event->hw;
> > >
> > > - cpuc->events[hwc->idx] = NULL;
> > > cpuc->n_events--;
> > > + __clear_bit(hwc->idx, &cpuc->used_cntr_mask);
> > > +
> > > + cpuc->events[hwc->idx] = NULL;
> > > riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
> > > perf_event_update_userpage(event);
> > > }
> > > @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event)
> > > {
> > > struct perf_event_attr *attr = &event->attr;
> > > struct hw_perf_event *hwc = &event->hw;
> > > + unsigned long config_base = 0;
> > > int err;
> > > int code;
> > >
> > > @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event)
> > > code = riscv_pmu->map_cache_event(attr->config);
> > > break;
> > > case PERF_TYPE_RAW:
> > > - return -EOPNOTSUPP;
> > > + code = attr->config;
> > > + break;
> > > default:
> > > return -ENOENT;
> > > }
> > >
> > > + if (is_base_counter(code))
> > > + config_base |= RISCV_PMU_TYPE_BASE;
> > > + else
> > > + config_base |= RISCV_PMU_TYPE_EVENT;
> > > +
> > > event->destroy = riscv_event_destroy;
> > > if (code < 0) {
> > > event->destroy(event);
> > > @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event)
> > > * But since we don't have such support, later in pmu->add(), we just
> > > * use hwc->config as the index instead.
> > > */
> > > + hwc->config_base = config_base;
> > > hwc->config = code;
> > > hwc->idx = -1;
> > >
> > > --
> > > 2.27.0
> > >