Re: [PATCH v2 1/4] perf: Allow using AUX data in perf samples

From: Peter Zijlstra
Date: Thu Oct 24 2019 - 10:06:32 EST


On Tue, Oct 22, 2019 at 12:58:09PM +0300, Alexander Shishkin wrote:
> @@ -964,6 +979,7 @@ struct perf_sample_data {
> u32 reserved;
> } cpu_entry;
> struct perf_callchain_entry *callchain;
> + u64 aux_size;
>
> /*
> * regs_user may point to task_pt_regs or to regs_user_copy, depending
> @@ -996,6 +1012,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> data->weight = 0;
> data->data_src.val = PERF_MEM_NA;
> data->txn = 0;
> + data->aux_size = 0;
> }
>

I don't see the need to initialize in perf_sample_data_init(), because
prepare sets it unconditionally:

> +static unsigned long perf_aux_sample_size(struct perf_event *event,
> + struct perf_sample_data *data,
> + size_t size)
> +{
> + struct perf_event *sampler = event->aux_event;
> + struct ring_buffer *rb;
> +
> + data->aux_size = 0;
> +
> + if (!sampler)
> + goto out;
> +
> + if (WARN_ON_ONCE(READ_ONCE(sampler->state) != PERF_EVENT_STATE_ACTIVE))
> + goto out;
> +
> + if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id()))
> + goto out;
> +
> + rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
> + if (!rb)
> + goto out;
> +
> + /*
> + * If this is an NMI hit inside sampling code, don't take
> + * the sample. See also perf_aux_sample_output().
> + */
> + if (READ_ONCE(rb->aux_in_sampling)) {
> + data->aux_size = 0;
> + } else {
> + size = min_t(size_t, size, perf_aux_size(rb));
> + data->aux_size = ALIGN(size, sizeof(u64));
> + }
> + ring_buffer_put(rb);
> +
> +out:
> + return data->aux_size;
> +}

When PERF_SAMPLE_AUX

> @@ -6699,6 +6824,35 @@ void perf_prepare_sample(struct perf_event_header *header,
>
> if (sample_type & PERF_SAMPLE_PHYS_ADDR)
> data->phys_addr = perf_virt_to_phys(data->addr);
> +
> + if (sample_type & PERF_SAMPLE_AUX) {
> + u64 size;
> +
> + header->size += sizeof(u64); /* size */
> +
> + /*
> + * Given the 16bit nature of header::size, an AUX sample can
> + * easily overflow it, what with all the preceding sample bits.
> + * Make sure this doesn't happen by using up to U16_MAX bytes
> + * per sample in total (rounded down to 8 byte boundary).
> + */
> + size = min_t(size_t, U16_MAX - header->size,
> + event->attr.aux_sample_size);
> + size = rounddown(size, 8);
> + size = perf_aux_sample_size(event, data, size);
> +
> + WARN_ON_ONCE(size + header->size > U16_MAX);
> + header->size += size;
> + }
> + /*
> + * If you're adding more sample types here, you likely need to do
> + * something about the overflowing header::size, like repurpose the
> + * lowest 3 bits of size, which should be always zero at the moment.
> + * This raises a more important question, do we really need 512k sized
> + * samples and why, so good argumentation is in order for whatever you
> + * do here next.
> + */
> + WARN_ON_ONCE(header->size & 7);
> }

And output only looks at it when PERF_SAMPLE_AUX.

> +static void perf_aux_sample_output(struct perf_event *event,
> + struct perf_output_handle *handle,
> + struct perf_sample_data *data)
> +{
> + struct perf_event *sampler = event->aux_event;
> + unsigned long pad;
> + struct ring_buffer *rb;
> + long size;
> +
> + if (WARN_ON_ONCE(!sampler || !data->aux_size))
> + return;
> +
> + rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler);
> + if (!rb)
> + return;
> +
> + /*
> + * Guard against NMI hits inside the critical section;
> + * see also perf_aux_sample_size().
> + */
> + WRITE_ONCE(rb->aux_in_sampling, 1);
> +
> + size = perf_pmu_aux_sample_output(sampler, handle, data->aux_size);
> +
> + /*
> + * An error here means that perf_output_copy() failed (returned a
> + * non-zero surplus that it didn't copy), which in its current
> + * enlightened implementation is not possible. If that changes, we'd
> + * like to know.
> + */
> + if (WARN_ON_ONCE(size < 0))
> + goto out_clear;
> +
> + /*
> + * The pad comes from ALIGN()ing data->aux_size up to u64 in
> + * perf_aux_sample_size(), so should not be more than that.
> + */
> + pad = data->aux_size - size;
> + if (WARN_ON_ONCE(pad >= sizeof(u64)))
> + pad = 8;
> +
> + if (pad) {
> + u64 zero = 0;
> + perf_output_copy(handle, &zero, pad);
> + }
> +
> +out_clear:
> + WRITE_ONCE(rb->aux_in_sampling, 0);
> +
> + ring_buffer_put(rb);
> +}
> +

> @@ -6511,6 +6629,13 @@ void perf_output_sample(struct perf_output_handle *handle,
> if (sample_type & PERF_SAMPLE_PHYS_ADDR)
> perf_output_put(handle, data->phys_addr);
>
> + if (sample_type & PERF_SAMPLE_AUX) {
> + perf_output_put(handle, data->aux_size);
> +
> + if (data->aux_size)
> + perf_aux_sample_output(event, handle, data);
> + }
> +
> if (!event->attr.watermark) {
> int wakeup_events = event->attr.wakeup_events;


So, afaict, you can simply remove the line in perf_sample_data_init().