Re: [RFC PATCH 4/6] riscv: perf: Add raw event support

From: Anup Patel
Date: Mon Jun 29 2020 - 16:58:18 EST


On Mon, Jun 29, 2020 at 8:49 AM Zong Li <zong.li@xxxxxxxxxx> wrote:
>
> Add support for raw events and hardware cache events. Currently, we set
> the events by writing the mhpmeventN CSRs, it would raise an illegal
> instruction exception and trap into m-mode to emulate event selector
> CSRs access. It doesn't make sense because we shouldn't write the
> m-mode CSRs in s-mode, it would be better that set events through SBI
> call or the shadow CSRs of s-mode. We would change it later.
>
> Signed-off-by: Zong Li <zong.li@xxxxxxxxxx>
> ---
> arch/riscv/include/asm/perf_event.h | 65 ++++++---
> arch/riscv/kernel/perf_event.c | 204 +++++++++++++++++++++++-----
> 2 files changed, 215 insertions(+), 54 deletions(-)
>
> diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> index 062efd3a1d5d..41d515a1f331 100644
> --- a/arch/riscv/include/asm/perf_event.h
> +++ b/arch/riscv/include/asm/perf_event.h
> @@ -14,39 +14,64 @@
>
> #ifdef CONFIG_RISCV_BASE_PMU
> #define RISCV_BASE_COUNTERS 2
> +#define RISCV_EVENT_COUNTERS 29

Same comment as DT documentation related to naming.

Regards,
Anup


> +#define RISCV_TOTAL_COUNTERS (RISCV_BASE_COUNTERS + RISCV_EVENT_COUNTERS)
>
> /*
> - * The RISCV_MAX_COUNTERS parameter should be specified.
> - */
> -
> -#define RISCV_MAX_COUNTERS 2
> -
> -/*
> - * These are the indexes of bits in counteren register *minus* 1,
> - * except for cycle. It would be coherent if it can directly mapped
> - * to counteren bit definition, but there is a *time* register at
> - * counteren[1]. Per-cpu structure is scarce resource here.
> - *
> * According to the spec, an implementation can support counter up to
> * mhpmcounter31, but many high-end processors has at most 6 general
> * PMCs, we give the definition to MHPMCOUNTER8 here.
> */
> -#define RISCV_PMU_CYCLE 0
> -#define RISCV_PMU_INSTRET 1
> -#define RISCV_PMU_MHPMCOUNTER3 2
> -#define RISCV_PMU_MHPMCOUNTER4 3
> -#define RISCV_PMU_MHPMCOUNTER5 4
> -#define RISCV_PMU_MHPMCOUNTER6 5
> -#define RISCV_PMU_MHPMCOUNTER7 6
> -#define RISCV_PMU_MHPMCOUNTER8 7
> +#define RISCV_PMU_CYCLE 0
> +#define RISCV_PMU_INSTRET 2
> +#define RISCV_PMU_HPMCOUNTER3 3
> +#define RISCV_PMU_HPMCOUNTER4 4
> +#define RISCV_PMU_HPMCOUNTER5 5
> +#define RISCV_PMU_HPMCOUNTER6 6
> +#define RISCV_PMU_HPMCOUNTER7 7
> +#define RISCV_PMU_HPMCOUNTER8 8
> +
> +#define RISCV_PMU_HPMCOUNTER_FIRST 3
> +#define RISCV_PMU_HPMCOUNTER_LAST \
> + (RISCV_PMU_HPMCOUNTER_FIRST + riscv_pmu->num_counters - 1)
>
> #define RISCV_OP_UNSUPP (-EOPNOTSUPP)
>
> +/* Hardware cache event encoding */
> +#define PERF_HW_CACHE_TYPE 0
> +#define PERF_HW_CACHE_OP 8
> +#define PERF_HW_CACHE_RESULT 16
> +#define PERF_HW_CACHE_MASK 0xff
> +
> +/* config_base encoding */
> +#define RISCV_PMU_TYPE_MASK 0x3
> +#define RISCV_PMU_TYPE_BASE 0x1
> +#define RISCV_PMU_TYPE_EVENT 0x2
> +#define RISCV_PMU_EXCLUDE_MASK 0xc
> +#define RISCV_PMU_EXCLUDE_USER 0x3
> +#define RISCV_PMU_EXCLUDE_KERNEL 0x4
> +
> +/*
> + * Currently, machine-mode supports emulation of mhpmeventN. Setting mhpmeventN
> + * to raise an illegal instruction exception to set event types in machine-mode.
> + * Eventually, we should set event types through standard SBI call or the shadow
> + * CSRs of supervisor-mode, because it is weird for writing CSR of machine-mode
> + * explicitly in supervisor-mode. These macro should be removed in the future.
> + */
> +#define CSR_MHPMEVENT3 0x323
> +#define CSR_MHPMEVENT4 0x324
> +#define CSR_MHPMEVENT5 0x325
> +#define CSR_MHPMEVENT6 0x326
> +#define CSR_MHPMEVENT7 0x327
> +#define CSR_MHPMEVENT8 0x328
> +
> struct cpu_hw_events {
> /* # currently enabled events*/
> int n_events;
> /* currently enabled events */
> - struct perf_event *events[RISCV_MAX_COUNTERS];
> + struct perf_event *events[RISCV_EVENT_COUNTERS];
> + /* bitmap of used event counters */
> + unsigned long used_cntr_mask;
> /* vendor-defined PMU data */
> void *platform;
> };
> diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
> index c835f0362d94..0cfcd6f1e57b 100644
> --- a/arch/riscv/kernel/perf_event.c
> +++ b/arch/riscv/kernel/perf_event.c
> @@ -139,6 +139,53 @@ static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
> },
> };
>
> +/*
> + * Methods for checking and getting PMU information
> + */
> +
> +static inline int is_base_counter(int idx)
> +{
> + return (idx == RISCV_PMU_CYCLE || idx == RISCV_PMU_INSTRET);
> +}
> +
> +static inline int is_event_counter(int idx)
> +{
> + return (idx >= RISCV_PMU_HPMCOUNTER_FIRST &&
> + idx <= RISCV_PMU_HPMCOUNTER_LAST);
> +}
> +
> +static inline int get_available_counter(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> + struct hw_perf_event *hwc = &event->hw;
> + unsigned long config_base = hwc->config_base & RISCV_PMU_TYPE_MASK;
> + unsigned long mask;
> + int ret;
> +
> + switch (config_base) {
> + case RISCV_PMU_TYPE_BASE:
> + ret = hwc->config;
> + if (WARN_ON_ONCE(!is_base_counter(ret)))
> + return -ENOSPC;
> + break;
> + case RISCV_PMU_TYPE_EVENT:
> + mask = ~cpuc->used_cntr_mask;
> + ret = find_next_bit(&mask, RISCV_PMU_HPMCOUNTER_LAST, 3);
> + if (WARN_ON_ONCE(!is_event_counter(ret)))
> + return -ENOSPC;
> + break;
> + default:
> + return -ENOENT;
> + }
> +
> + __set_bit(ret, &cpuc->used_cntr_mask);
> +
> + return ret;
> +}
> +
> +/*
> + * Map generic hardware event
> + */
> static int riscv_map_hw_event(u64 config)
> {
> if (config >= riscv_pmu->max_events)
> @@ -147,32 +194,28 @@ static int riscv_map_hw_event(u64 config)
> return riscv_pmu->hw_events[config];
> }
>
> -static int riscv_map_cache_decode(u64 config, unsigned int *type,
> - unsigned int *op, unsigned int *result)
> -{
> - return -ENOENT;
> -}
> -
> +/*
> + * Map generic hardware cache event
> + */
> static int riscv_map_cache_event(u64 config)
> {
> unsigned int type, op, result;
> - int err = -ENOENT;
> - int code;
> + int ret;
>
> - err = riscv_map_cache_decode(config, &type, &op, &result);
> - if (!riscv_pmu->cache_events || err)
> - return err;
> + type = (config >> PERF_HW_CACHE_TYPE) & PERF_HW_CACHE_MASK;
> + op = (config >> PERF_HW_CACHE_OP) & PERF_HW_CACHE_MASK;
> + result = (config >> PERF_HW_CACHE_RESULT) & PERF_HW_CACHE_MASK;
>
> if (type >= PERF_COUNT_HW_CACHE_MAX ||
> op >= PERF_COUNT_HW_CACHE_OP_MAX ||
> result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
> return -EINVAL;
>
> - code = (*riscv_pmu->cache_events)[type][op][result];
> - if (code == RISCV_OP_UNSUPP)
> + ret = riscv_cache_event_map[type][op][result];
> + if (ret == RISCV_OP_UNSUPP)
> return -EINVAL;
>
> - return code;
> + return ret == RISCV_OP_UNSUPP ? -ENOENT : ret;
> }
>
> /*
> @@ -190,8 +233,27 @@ static inline u64 read_counter(int idx)
> case RISCV_PMU_INSTRET:
> val = csr_read(CSR_INSTRET);
> break;
> + case RISCV_PMU_HPMCOUNTER3:
> + val = csr_read(CSR_HPMCOUNTER3);
> + break;
> + case RISCV_PMU_HPMCOUNTER4:
> + val = csr_read(CSR_HPMCOUNTER4);
> + break;
> + case RISCV_PMU_HPMCOUNTER5:
> + val = csr_read(CSR_HPMCOUNTER5);
> + break;
> + case RISCV_PMU_HPMCOUNTER6:
> + val = csr_read(CSR_HPMCOUNTER6);
> + break;
> + case RISCV_PMU_HPMCOUNTER7:
> + val = csr_read(CSR_HPMCOUNTER7);
> + break;
> + case RISCV_PMU_HPMCOUNTER8:
> + val = csr_read(CSR_HPMCOUNTER8);

This is broken for RV32 because for RV32 we have to read two
CSRs to get a counter value.

Also, for correctly reading a 64bit counter on RV32 we have
to read just like get_cycles64() does for RV32.

static inline u64 get_cycles64(void)
{
u32 hi, lo;

do {
hi = get_cycles_hi();
lo = get_cycles();
} while (hi != get_cycles_hi());

return ((u64)hi << 32) | lo;
}

Regards,
Anup


> + break;
> default:
> - WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
> + WARN_ON_ONCE(idx < RISCV_PMU_CYCLE ||
> + idx > RISCV_TOTAL_COUNTERS);
> return -EINVAL;
> }
>
> @@ -204,6 +266,68 @@ static inline void write_counter(int idx, u64 value)
> WARN_ON_ONCE(1);
> }
>
> +static inline void write_event(int idx, u64 value)
> +{
> + /* TODO: We shouldn't write CSR of m-mode explicitly here. Ideally,
> + * it need to set the event selector by SBI call or the s-mode
> + * shadow CSRs of them. Exploit illegal instruction exception to
> + * emulate mhpmcounterN access in m-mode.
> + */
> + switch (idx) {
> + case RISCV_PMU_HPMCOUNTER3:
> + csr_write(CSR_MHPMEVENT3, value);
> + break;
> + case RISCV_PMU_HPMCOUNTER4:
> + csr_write(CSR_MHPMEVENT4, value);
> + break;
> + case RISCV_PMU_HPMCOUNTER5:
> + csr_write(CSR_MHPMEVENT5, value);
> + break;
> + case RISCV_PMU_HPMCOUNTER6:
> + csr_write(CSR_MHPMEVENT6, value);
> + break;
> + case RISCV_PMU_HPMCOUNTER7:
> + csr_write(CSR_MHPMEVENT7, value);
> + break;
> + case RISCV_PMU_HPMCOUNTER8:
> + csr_write(CSR_MHPMEVENT8, value);
> + break;
> + default:
> + WARN_ON_ONCE(idx < RISCV_PMU_HPMCOUNTER3 ||
> + idx > RISCV_TOTAL_COUNTERS);
> + return;
> + }
> +}
> +
> +/*
> + * Enable and disable event counters
> + */
> +
> +static inline void riscv_pmu_enable_event(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> +
> + if (is_event_counter(idx))
> + write_event(idx, hwc->config);
> +
> + /*
> + * Since we cannot write to counters, this serves as an initialization
> + * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> + * wrong when pmu->read is called for the first time.
> + */
> + local64_set(&hwc->prev_count, read_counter(hwc->idx));
> +}
> +
> +static inline void riscv_pmu_disable_event(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + int idx = hwc->idx;
> +
> + if (is_event_counter(idx))
> + write_event(idx, 0);
> +}
> +
> /*
> * pmu->read: read and update the counter
> *
> @@ -232,6 +356,7 @@ static void riscv_pmu_read(struct perf_event *event)
> */
> delta = (new_raw_count - prev_raw_count) &
> ((1ULL << riscv_pmu->counter_width) - 1);
> +
> local64_add(delta, &event->count);
> /*
> * Something like local64_sub(delta, &hwc->period_left) here is
> @@ -252,6 +377,11 @@ static void riscv_pmu_stop(struct perf_event *event, int flags)
> {
> struct hw_perf_event *hwc = &event->hw;
>
> + if (WARN_ON_ONCE(hwc->idx == -1))
> + return;
> +
> + riscv_pmu_disable_event(event);
> +
> WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
> hwc->state |= PERF_HES_STOPPED;
>
> @@ -271,6 +401,9 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
> return;
>
> + if (WARN_ON_ONCE(hwc->idx == -1))
> + return;
> +
> if (flags & PERF_EF_RELOAD) {
> WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
>
> @@ -281,14 +414,10 @@ static void riscv_pmu_start(struct perf_event *event, int flags)
> }
>
> hwc->state = 0;
> - perf_event_update_userpage(event);
>
> - /*
> - * Since we cannot write to counters, this serves as an initialization
> - * to the delta-mechanism in pmu->read(); otherwise, the delta would be
> - * wrong when pmu->read is called for the first time.
> - */
> - local64_set(&hwc->prev_count, read_counter(hwc->idx));
> + riscv_pmu_enable_event(event);
> +
> + perf_event_update_userpage(event);
> }
>
> /*
> @@ -298,21 +427,18 @@ static int riscv_pmu_add(struct perf_event *event, int flags)
> {
> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> struct hw_perf_event *hwc = &event->hw;
> + int count_idx;
>
> if (cpuc->n_events == riscv_pmu->num_counters)
> return -ENOSPC;
>
> - /*
> - * We don't have general conunters, so no binding-event-to-counter
> - * process here.
> - *
> - * Indexing using hwc->config generally not works, since config may
> - * contain extra information, but here the only info we have in
> - * hwc->config is the event index.
> - */
> - hwc->idx = hwc->config;
> - cpuc->events[hwc->idx] = event;
> + count_idx = get_available_counter(event);
> + if (count_idx < 0)
> + return -ENOSPC;
> +
> cpuc->n_events++;
> + hwc->idx = count_idx;
> + cpuc->events[hwc->idx] = event;
>
> hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
>
> @@ -330,8 +456,10 @@ static void riscv_pmu_del(struct perf_event *event, int flags)
> struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> struct hw_perf_event *hwc = &event->hw;
>
> - cpuc->events[hwc->idx] = NULL;
> cpuc->n_events--;
> + __clear_bit(hwc->idx, &cpuc->used_cntr_mask);
> +
> + cpuc->events[hwc->idx] = NULL;
> riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
> perf_event_update_userpage(event);
> }
> @@ -385,6 +513,7 @@ static int riscv_event_init(struct perf_event *event)
> {
> struct perf_event_attr *attr = &event->attr;
> struct hw_perf_event *hwc = &event->hw;
> + unsigned long config_base = 0;
> int err;
> int code;
>
> @@ -406,11 +535,17 @@ static int riscv_event_init(struct perf_event *event)
> code = riscv_pmu->map_cache_event(attr->config);
> break;
> case PERF_TYPE_RAW:
> - return -EOPNOTSUPP;
> + code = attr->config;
> + break;
> default:
> return -ENOENT;
> }
>
> + if (is_base_counter(code))
> + config_base |= RISCV_PMU_TYPE_BASE;
> + else
> + config_base |= RISCV_PMU_TYPE_EVENT;
> +
> event->destroy = riscv_event_destroy;
> if (code < 0) {
> event->destroy(event);
> @@ -424,6 +559,7 @@ static int riscv_event_init(struct perf_event *event)
> * But since we don't have such support, later in pmu->add(), we just
> * use hwc->config as the index instead.
> */
> + hwc->config_base = config_base;
> hwc->config = code;
> hwc->idx = -1;
>
> --
> 2.27.0
>