Re: [PATCH 01/10] perf record: new interfaces to read ring buffer to file

From: Arnaldo Carvalho de Melo
Date: Tue Oct 10 2017 - 14:30:26 EST


Em Tue, Oct 10, 2017 at 10:20:14AM -0700, kan.liang@xxxxxxxxx escreveu:
> From: Kan Liang <kan.liang@xxxxxxxxx>
>
> Factor out the mmap_read related codes into separate functions and move
> them to evlist.c
> struct perf_mmap_read is introduced to store the specific range of ring
> buffer information.
>
> The way to read specific range of ring buffer into file is as below,
> perf_mmap__read_init
> perf_mmap__read_to_file
> perf_mmap__consume
>
> There is no functional change.
>
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>
> ---
> tools/perf/builtin-record.c | 111 ++++++++------------------------------------
> tools/perf/util/evlist.c | 111 ++++++++++++++++++++++++++++++++++++++++++++
> tools/perf/util/evlist.h | 15 ++++++
> 3 files changed, 146 insertions(+), 91 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 234fdf4..df555e9 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -105,6 +105,17 @@ static bool switch_output_time(struct record *rec)
> trigger_is_ready(&switch_output_trigger);
> }
>
> +static void update_output_size(struct record *rec, size_t size)
> +{
> + if (size == 0)
> + return;
> +
> + rec->bytes_written += size;
> +
> + if (switch_output_size(rec))
> + trigger_hit(&switch_output_trigger);
> +}
> +
> static int record__write(struct record *rec, void *bf, size_t size)
> {
> if (perf_data_file__write(rec->session->file, bf, size) < 0) {
> @@ -112,10 +123,7 @@ static int record__write(struct record *rec, void *bf, size_t size)
> return -1;
> }
>
> - rec->bytes_written += size;
> -
> - if (switch_output_size(rec))
> - trigger_hit(&switch_output_trigger);
> + update_output_size(rec, size);
>
> return 0;
> }
> @@ -130,106 +138,27 @@ static int process_synthesized_event(struct perf_tool *tool,
> }
>
> static int
> -backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
> -{
> - struct perf_event_header *pheader;
> - u64 evt_head = head;
> - int size = mask + 1;
> -
> - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
> - pheader = (struct perf_event_header *)(buf + (head & mask));
> - *start = head;
> - while (true) {
> - if (evt_head - head >= (unsigned int)size) {
> - pr_debug("Finished reading backward ring buffer: rewind\n");
> - if (evt_head - head > (unsigned int)size)
> - evt_head -= pheader->size;
> - *end = evt_head;
> - return 0;
> - }
> -
> - pheader = (struct perf_event_header *)(buf + (evt_head & mask));
> -
> - if (pheader->size == 0) {
> - pr_debug("Finished reading backward ring buffer: get start\n");
> - *end = evt_head;
> - return 0;
> - }
> -
> - evt_head += pheader->size;
> - pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
> - }
> - WARN_ONCE(1, "Shouldn't get here\n");
> - return -1;
> -}
> -
> -static int
> -rb_find_range(void *data, int mask, u64 head, u64 old,
> - u64 *start, u64 *end, bool backward)
> -{
> - if (!backward) {
> - *start = old;
> - *end = head;
> - return 0;
> - }
> -
> - return backward_rb_find_range(data, mask, head, start, end);
> -}
> -
> -static int
> record__mmap_read(struct record *rec, struct perf_mmap *md,
> bool overwrite, bool backward)
> {
> - u64 head = perf_mmap__read_head(md);
> - u64 old = md->prev;
> - u64 end = head, start = old;
> - unsigned char *data = md->base + page_size;
> - unsigned long size;
> - void *buf;
> - int rc = 0;
> + struct perf_mmap_read read;
>
> - if (rb_find_range(data, md->mask, head,
> - old, &start, &end, backward))
> + if (perf_mmap__read_init(md, &read, overwrite, backward))
> return -1;
>
> - if (start == end)
> + if (read.start == read.end)
> return 0;
>
> rec->samples++;
>
> - size = end - start;
> - if (size > (unsigned long)(md->mask) + 1) {
> - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
> -
> - md->prev = head;
> - perf_mmap__consume(md, overwrite || backward);
> - return 0;
> - }
> -
> - if ((start & md->mask) + size != (end & md->mask)) {
> - buf = &data[start & md->mask];
> - size = md->mask + 1 - (start & md->mask);
> - start += size;
> -
> - if (record__write(rec, buf, size) < 0) {
> - rc = -1;
> - goto out;
> - }
> - }
> + if (perf_mmap__read_to_file(&read, rec->session->file) < 0)
> + return -1;
>
> - buf = &data[start & md->mask];
> - size = end - start;
> - start += size;
> + update_output_size(rec, read.size);
>
> - if (record__write(rec, buf, size) < 0) {
> - rc = -1;
> - goto out;
> - }
> -
> - md->prev = head;
> perf_mmap__consume(md, overwrite || backward);
> -out:
> - return rc;
> +
> + return 0;
> }
>
> static volatile int done;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 6a0d7ff..33b8837 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -704,6 +704,117 @@ static int perf_evlist__resume(struct perf_evlist *evlist)
> return perf_evlist__set_paused(evlist, false);
> }
>
> +static int
> +backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)
> +{
> + struct perf_event_header *pheader;
> + u64 evt_head = head;
> + int size = mask + 1;
> +
> + pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);
> + pheader = (struct perf_event_header *)(buf + (head & mask));
> + *start = head;
> + while (true) {
> + if (evt_head - head >= (unsigned int)size) {
> + pr_debug("Finished reading backward ring buffer: rewind\n");
> + if (evt_head - head > (unsigned int)size)
> + evt_head -= pheader->size;
> + *end = evt_head;
> + return 0;
> + }
> +
> + pheader = (struct perf_event_header *)(buf + (evt_head & mask));
> +
> + if (pheader->size == 0) {
> + pr_debug("Finished reading backward ring buffer: get start\n");
> + *end = evt_head;
> + return 0;
> + }
> +
> + evt_head += pheader->size;
> + pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
> + }
> + WARN_ONCE(1, "Shouldn't get here\n");
> + return -1;
> +}
> +
> +static int
> +rb_find_range(void *data, int mask, u64 head, u64 old,
> + u64 *start, u64 *end, bool backward)
> +{
> + if (!backward) {
> + *start = old;
> + *end = head;
> + return 0;
> + }
> +
> + return backward_rb_find_range(data, mask, head, start, end);
> +}
> +
> +/*
> + * Initialize struct perf_mmap_read
> + * Calculate the range of ring buffer to read
> + */
> +int perf_mmap__read_init(struct perf_mmap *md, struct perf_mmap_read *read,
> + bool overwrite, bool backward)
> +{
> + unsigned char *data = md->base + page_size;
> +
> + read->md = md;
> + read->head = perf_mmap__read_head(md);
> + read->overwrite = overwrite;
> + read->backward = backward;
> + read->size = 0;
> + return rb_find_range(data, md->mask, read->head, md->prev,
> + &read->start, &read->end, backward);
> +}
> +
> +/*
> + * Read the ring buffer in the range which specified in struct perf_mmap_read,
> + * and write to file.
> + * Used by perf record.
> + */
> +int perf_mmap__read_to_file(struct perf_mmap_read *read,
> + struct perf_data_file *file)
> +{
> + struct perf_mmap *md = read->md;
> + unsigned char *data = md->base + page_size;
> + unsigned long size;
> + void *buf;
> +
> + size = read->end - read->start;
> + if (size > (unsigned long)(md->mask) + 1) {
> + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
> + read->size = 0;
> + goto out;
> + }
> +
> + if ((read->start & md->mask) + size != (read->end & md->mask)) {
> + buf = &data[read->start & md->mask];
> + size = md->mask + 1 - (read->start & md->mask);
> + read->start += size;
> + read->size += size;
> +
> + if (perf_data_file__write(file, buf, size) < 0) {
> + pr_err("failed to write %s, error: %m\n", file->path);
> + return -1;
> + }
> + }
> +
> + buf = &data[read->start & md->mask];
> + size = read->end - read->start;
> + read->start += size;
> + read->size += size;
> +
> + if (perf_data_file__write(file, buf, size) < 0) {
> + pr_err("failed to write %s, error: %m\n", file->path);
> + return -1;
> + }
> +out:
> + md->prev = read->head;
> + return 0;
> +}
> +
> /* When check_messup is true, 'end' must points to a good entry */
> static union perf_event *
> perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index b1c14f1..1ce4857 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -39,6 +39,16 @@ struct perf_mmap {
> char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
> };
>
> +struct perf_mmap_read {
> + struct perf_mmap *md;
> + u64 head;
> + u64 start;
> + u64 end;

So there will be always a one-on-one association of 'struct
perf_mmap_read' and 'struct perf_mmap', why not go on adding more fields
to 'struct perf_mmap' as we need, but not doing it all at once
(backward, snapshotting, overwrite, etc) but first the simple part, make
the most basic mode:

perf record -a

perf top

work, multithreaded, leaving the other more complicated modes
fallbacking to the old format, then when we have it solid, go on getting
the other features.

In the end, having the two formats supported will be needed anyway, and
we can as well ask for processing with both perf.data file formats to
compare results, while we strenghten out the new code.

I just think we should do this in a more fine grained way to avoid too
much code churn as well as having a fallback to the old code, that
albeit non scalable, is what we have been using and can help in
certifying that the new one works well, by comparing its outputs.

- Arnaldo

> + bool overwrite;
> + bool backward;
> + unsigned long size;
> +};
> +
> static inline size_t
> perf_mmap__mmap_len(struct perf_mmap *map)
> {
> @@ -193,6 +203,11 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
>
> void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
>
> +int perf_mmap__read_init(struct perf_mmap *md, struct perf_mmap_read *read,
> + bool overwrite, bool backward);
> +int perf_mmap__read_to_file(struct perf_mmap_read *read,
> + struct perf_data_file *file);
> +
> int perf_evlist__open(struct perf_evlist *evlist);
> void perf_evlist__close(struct perf_evlist *evlist);
>
> --
> 2.5.5