Re: [PATCH V6 2/3] perf: Extend perf_output_read
From: Liang, Kan
Date: Thu Dec 19 2024 - 19:42:59 EST
On 2024-12-19 5:21 p.m., Peter Zijlstra wrote:
> On Wed, Dec 18, 2024 at 07:16:42AM -0800, kan.liang@xxxxxxxxxxxxxxx wrote:
>> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>>
>> The event may have been updated in the PMU-specific implementation,
>> e.g., Intel PEBS counters snapshotting. The common code should not
>> read and overwrite the value.
>>
>> The PERF_SAMPLE_READ in the data->sample_type can be used to detect
>> whether the PMU-specific value is available. If yes, avoid the
>> pmu->read() in the common code.
>
> I had a poke at this, and ended up with the below. Not sure though,
> wdyt?
It looks good to me. I will do more tests tomorrow and send a V7.
Thanks,
Kan
>
> ---
> include/linux/perf_event.h | 8 +++++++-
> kernel/events/core.c | 33 ++++++++++++++++-----------------
> kernel/events/ring_buffer.c | 1 +
> 3 files changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 8333f132f4a9..582f517a5dc8 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1062,7 +1062,13 @@ struct perf_output_handle {
> struct perf_buffer *rb;
> unsigned long wakeup;
> unsigned long size;
> - u64 aux_flags;
> + union {
> + u64 flags; /* perf_output*() */
> + u64 aux_flags; /* perf_aux_output*() */
> + struct {
> + u64 skip_read : 1;
> + };
> + };
> union {
> void *addr;
> unsigned long head;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index b2bc67791f84..f91ba29048ce 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1191,6 +1191,12 @@ static void perf_assert_pmu_disabled(struct pmu *pmu)
> WARN_ON_ONCE(*this_cpu_ptr(pmu->pmu_disable_count) == 0);
> }
>
> +static inline void perf_pmu_read(struct perf_event *event)
> +{
> + if (event->state == PERF_EVENT_STATE_ACTIVE)
> + event->pmu->read(event);
> +}
> +
> static void get_ctx(struct perf_event_context *ctx)
> {
> refcount_inc(&ctx->refcount);
> @@ -3473,8 +3479,7 @@ static void __perf_event_sync_stat(struct perf_event *event,
> * we know the event must be on the current CPU, therefore we
> * don't need to use it.
> */
> - if (event->state == PERF_EVENT_STATE_ACTIVE)
> - event->pmu->read(event);
> + perf_pmu_read(event);
>
> perf_event_update_time(event);
>
> @@ -4618,15 +4623,8 @@ static void __perf_event_read(void *info)
>
> pmu->read(event);
>
> - for_each_sibling_event(sub, event) {
> - if (sub->state == PERF_EVENT_STATE_ACTIVE) {
> - /*
> - * Use sibling's PMU rather than @event's since
> - * sibling could be on different (eg: software) PMU.
> - */
> - sub->pmu->read(sub);
> - }
> - }
> + for_each_sibling_event(sub, event)
> + perf_pmu_read(sub);
>
> data->ret = pmu->commit_txn(pmu);
>
> @@ -7400,9 +7398,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
> values[n++] = running;
>
> - if ((leader != event) &&
> - (leader->state == PERF_EVENT_STATE_ACTIVE))
> - leader->pmu->read(leader);
> + if ((leader != event) && !handle->skip_read)
> + perf_pmu_read(leader);
>
> values[n++] = perf_event_count(leader, self);
> if (read_format & PERF_FORMAT_ID)
> @@ -7415,9 +7412,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
> for_each_sibling_event(sub, leader) {
> n = 0;
>
> - if ((sub != event) &&
> - (sub->state == PERF_EVENT_STATE_ACTIVE))
> - sub->pmu->read(sub);
> + if ((sub != event) && !handle->skip_read)
> + perf_pmu_read(sub);
>
> values[n++] = perf_event_count(sub, self);
> if (read_format & PERF_FORMAT_ID)
> @@ -7476,6 +7472,9 @@ void perf_output_sample(struct perf_output_handle *handle,
> {
> u64 sample_type = data->type;
>
> + if (data->sample_flags & PERF_SAMPLE_READ)
> + handle->skip_read = 1;
> +
> perf_output_put(handle, *header);
>
> if (sample_type & PERF_SAMPLE_IDENTIFIER)
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 4f46f688d0d4..9b49ecca693e 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -185,6 +185,7 @@ __perf_output_begin(struct perf_output_handle *handle,
>
> handle->rb = rb;
> handle->event = event;
> + handle->flags = 0;
>
> have_lost = local_read(&rb->lost);
> if (unlikely(have_lost)) {
>