Re: [PATCH v4 05/10] riscv: Prepare for user-space perf event mmap support

From: Atish Patra
Date: Fri Jul 14 2023 - 04:05:31 EST


On Mon, Jul 3, 2023 at 5:51 AM Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> wrote:
>
> Provide all the necessary bits in the generic riscv pmu driver to be
> able to mmap perf events in userspace: the heavy lifting lies in the
> driver backend, namely the legacy and sbi implementations.
>
> Note that arch_perf_update_userpage is almost a copy of arm64 code.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx>
> Reviewed-by: Andrew Jones <ajones@xxxxxxxxxxxxxxxx>
> ---
> drivers/perf/riscv_pmu.c | 105 +++++++++++++++++++++++++++++++++
> include/linux/perf/riscv_pmu.h | 4 ++
> 2 files changed, 109 insertions(+)
>
> diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
> index ebca5eab9c9b..432ad2e80ce3 100644
> --- a/drivers/perf/riscv_pmu.c
> +++ b/drivers/perf/riscv_pmu.c
> @@ -14,9 +14,73 @@
> #include <linux/perf/riscv_pmu.h>
> #include <linux/printk.h>
> #include <linux/smp.h>
> +#include <linux/sched_clock.h>
>
> #include <asm/sbi.h>
>
> +static bool riscv_perf_user_access(struct perf_event *event)
> +{
> + return ((event->attr.type == PERF_TYPE_HARDWARE) ||
> + (event->attr.type == PERF_TYPE_HW_CACHE) ||
> + (event->attr.type == PERF_TYPE_RAW)) &&
> + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
> +}
> +
> +void arch_perf_update_userpage(struct perf_event *event,
> + struct perf_event_mmap_page *userpg, u64 now)
> +{
> + struct clock_read_data *rd;
> + unsigned int seq;
> + u64 ns;
> +
> + userpg->cap_user_time = 0;
> + userpg->cap_user_time_zero = 0;
> + userpg->cap_user_time_short = 0;
> + userpg->cap_user_rdpmc = riscv_perf_user_access(event);
> +
> + userpg->pmc_width = 64;
> +
> + do {
> + rd = sched_clock_read_begin(&seq);
> +
> + userpg->time_mult = rd->mult;
> + userpg->time_shift = rd->shift;
> + userpg->time_zero = rd->epoch_ns;
> + userpg->time_cycles = rd->epoch_cyc;
> + userpg->time_mask = rd->sched_clock_mask;
> +
> + /*
> + * Subtract the cycle base, such that software that
> + * doesn't know about cap_user_time_short still 'works'
> + * assuming no wraps.
> + */
> + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
> + userpg->time_zero -= ns;
> +
> + } while (sched_clock_read_retry(seq));
> +
> + userpg->time_offset = userpg->time_zero - now;
> +
> + /*
> + * time_shift is not expected to be greater than 31 due to
> + * the original published conversion algorithm shifting a
> + * 32-bit value (now specifies a 64-bit value) - refer
> + * perf_event_mmap_page documentation in perf_event.h.
> + */
> + if (userpg->time_shift == 32) {
> + userpg->time_shift = 31;
> + userpg->time_mult >>= 1;
> + }
> +
> + /*
> + * Internal timekeeping for enabled/running/stopped times
> + * is always computed with the sched_clock.
> + */
> + userpg->cap_user_time = 1;
> + userpg->cap_user_time_zero = 1;
> + userpg->cap_user_time_short = 1;
> +}
> +
> static unsigned long csr_read_num(int csr_num)
> {
> #define switchcase_csr_read(__csr_num, __val) {\
> @@ -171,6 +235,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
>
> local64_set(&hwc->prev_count, (u64)-left);
>
> + perf_event_update_userpage(event);
> +
> return overflow;
> }
>
> @@ -267,6 +333,9 @@ static int riscv_pmu_event_init(struct perf_event *event)
> hwc->idx = -1;
> hwc->event_base = mapped_event;
>
> + if (rvpmu->event_init)
> + rvpmu->event_init(event);
> +
> if (!is_sampling_event(event)) {
> /*
> * For non-sampling runs, limit the sample_period to half
> @@ -283,6 +352,39 @@ static int riscv_pmu_event_init(struct perf_event *event)
> return 0;
> }
>
> +static int riscv_pmu_event_idx(struct perf_event *event)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
> + return 0;
> +
> + if (rvpmu->csr_index)
> + return rvpmu->csr_index(event) + 1;
> +
> + return 0;
> +}
> +
> +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (rvpmu->event_mapped) {
> + rvpmu->event_mapped(event, mm);
> + perf_event_update_userpage(event);
> + }
> +}
> +
> +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
> +{
> + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +
> + if (rvpmu->event_unmapped) {
> + rvpmu->event_unmapped(event, mm);
> + perf_event_update_userpage(event);
> + }
> +}
> +
> struct riscv_pmu *riscv_pmu_alloc(void)
> {
> struct riscv_pmu *pmu;
> @@ -307,6 +409,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
> }
> pmu->pmu = (struct pmu) {
> .event_init = riscv_pmu_event_init,
> + .event_mapped = riscv_pmu_event_mapped,
> + .event_unmapped = riscv_pmu_event_unmapped,
> + .event_idx = riscv_pmu_event_idx,
> .add = riscv_pmu_add,
> .del = riscv_pmu_del,
> .start = riscv_pmu_start,
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index 5deeea0be7cb..43282e22ebe1 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -55,6 +55,10 @@ struct riscv_pmu {
> void (*ctr_start)(struct perf_event *event, u64 init_val);
> void (*ctr_stop)(struct perf_event *event, unsigned long flag);
> int (*event_map)(struct perf_event *event, u64 *config);
> + void (*event_init)(struct perf_event *event);
> + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm);
> + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm);
> + uint8_t (*csr_index)(struct perf_event *event);
>
> struct cpu_hw_events __percpu *hw_events;
> struct hlist_node node;
> --
> 2.39.2
>

Reviewed-by: Atish Patra <atishp@xxxxxxxxxxxx>

--
Regards,
Atish