RE: [PATCH V2 2/8] perf tools: rewrite perf mmap read for overwrite
From: Liang, Kan
Date: Mon Dec 18 2017 - 14:07:29 EST
> > -union perf_event *perf_mmap__read_backward(struct perf_mmap *map)
> > +union perf_event *perf_mmap__read_backward(struct perf_mmap *map,
> > + u64 *start, u64 end)
> > {
> > - u64 head, end;
> > - u64 start = map->prev;
> > -
> > - /*
> > - * Check if event was unmapped due to a POLLHUP/POLLERR.
> > - */
> > - if (!refcount_read(&map->refcnt))
> > - return NULL;
> > -
>
> Is removing this checking safe?
It was duplicate as the one in perf_mmap__read_catchup.
Once planned to remove one. But it looks I removed both accidently.
Will keep one in V3.
>
> > - head = perf_mmap__read_head(map);
> > - if (!head)
> > - return NULL;
> > + union perf_event *event = NULL;
> >
> > - /*
> > - * 'head' pointer starts from 0. Kernel minus sizeof(record) form
> > - * it each time when kernel writes to it, so in fact 'head' is
> > - * negative. 'end' pointer is made manually by adding the size of
> > - * the ring buffer to 'head' pointer, means the validate data can
> > - * read is the whole ring buffer. If 'end' is positive, the ring
> > - * buffer has not fully filled, so we must adjust 'end' to 0.
> > - *
> > - * However, since both 'head' and 'end' is unsigned, we can't
> > - * simply compare 'end' against 0. Here we compare '-head' and
> > - * the size of the ring buffer, where -head is the number of bytes
> > - * kernel write to the ring buffer.
> > - */
> > - if (-head < (u64)(map->mask + 1))
> > - end = 0;
> > - else
> > - end = head + map->mask + 1;
> > + event = perf_mmap__read(map, *start, end, &map->prev);
> > + *start = map->prev;
> >
> > - return perf_mmap__read(map, start, end, &map->prev);
> > + return event;
> > }
>
> perf_mmap__read_backward() and perf_mmap__read_forward() become
> asymmetrical,
> but their names are symmetrical.
>
>
> >
> > -void perf_mmap__read_catchup(struct perf_mmap *map)
> > +static int overwrite_rb_find_range(void *buf, int mask, u64 head,
> > + u64 *start, u64 *end);
> > +
> > +int perf_mmap__read_catchup(struct perf_mmap *map,
> > + bool overwrite,
> > + u64 *start, u64 *end,
> > + unsigned long *size)
> > {
> > - u64 head;
> > + u64 head = perf_mmap__read_head(map);
> > + u64 old = map->prev;
> > + unsigned char *data = map->base + page_size;
> >
> > - if (!refcount_read(&map->refcnt))
> > - return;
> > + *start = overwrite ? head : old;
> > + *end = overwrite ? old : head;
> >
> > - head = perf_mmap__read_head(map);
> > - map->prev = head;
> > + if (*start == *end)
> > + return 0;
> > +
> > + *size = *end - *start;
> > + if (*size > (unsigned long)(map->mask) + 1) {
> > + if (!overwrite) {
> > + WARN_ONCE(1, "failed to keep up with mmap data.
> (warn only once)\n");
> > +
> > + map->prev = head;
> > + perf_mmap__consume(map, overwrite);
> > + return 0;
> > + }
> > +
> > + /*
> > + * Backward ring buffer is full. We still have a chance to read
> > + * most of data from it.
> > + */
> > + if (overwrite_rb_find_range(data, map->mask, head, start,
> end))
> > + return -1;
> > + }
> > +
> > + return 1;
>
> Coding suggestion: this function returns 2 different value (1 and -1) in
> two fail path. Should return 0 for success and -1 for failure.
>
For perf top, -1 is enough for failure.
But for perf_mmap__push, it looks two fail paths are needed, aren't they?
> You totally redefine perf_mmap__read_catchup(). The original meaning of
> this function is adjust md->prev so following perf_mmap__read() can read
> events one by one safely. Only backward ring buffer needs catchup:
> kernel's 'head' pointer keeps moving (backward), while md->prev keeps
> unchanged. For forward ring buffer, md->prev will catchup each time when
> reading from the ring buffer.
>
> Your patch totally changes its meaning. Now its meaning is finding the
> available data from a ring buffer (report start and end). At least we
> need a better name.
Sure, I will introduce a new function to do it in V3.
> In addition, if I understand your code correctly, we
> don't need to report 'size' to caller, because size is determined by
> start and end.
Sure, I will remove the 'size' in V3.
>
> In addition, since the concept of backward and overwrite are now
> unified, we can avoid updating map->prev during one-by-one reading
> (because backward reading don't need consume operation). I think we can
> decouple the updating of map->prev and these two basic read functions.
> This can makes the operations on map->prev clearer. Now the moving
> direction of map->prev is confusing for backward ring buffer: it moves
> forward during one by one reading, and moves backward when block
> reading
> (in perf record)
For reading, I think both one by one reading and block reading move forward
for overwrite.
It starts from head and ends in map->prev. No?
For one-by-one reading, yes, it doesnât need to update map->prev for each read.
But both need to update the map->prev when the block is finished.
> and when syncing.
>
> Then the core of two reading become uniform. Let's get rid of
> duplication.
>
> Summarize:
>
> 1. Compute both start and end before reading for both forward and
> backward (perf_mmap__read_catchup, but need a better name).
How about perf_mmap__read_init?
>
> 2. Don't update map->prev in perf_mmap__read. Let perf_mmap__read()
> update start pointer, so it become stateless and suit for both backward
> and forward reading.
>
> 3. Update md->prev for forward reading. Merge it into
> perf_mmap__consume?
It looks we have to pass the updated 'start' to perf_mmap__consume.
Move interfaces like perf_evlist__mmap_read need to be changed.
That would impacts other tools which only support non-overwrite mode.
How about update both 'md->prev' and 'start' in perf_mmap__read()
like the patch as below?
Introducing a new perf_mmap__read_event to get rid of
the perf_mmap__read_backward/forward.
perf_mmap__read_backward will be removed later.
Keep perf_mmap__read_forward since other tools still use it.
It has to update the 'end' for non-overwrite mode in each read since the
ringbuffer is not paused.
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 484a394..74f33fd 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -22,16 +22,16 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
/* When check_messup is true, 'end' must points to a good entry */
static union perf_event *perf_mmap__read(struct perf_mmap *map,
- u64 start, u64 end, u64 *prev)
+ u64 *start, u64 end, u64 *prev)
{
unsigned char *data = map->base + page_size;
union perf_event *event = NULL;
- int diff = end - start;
+ int diff = end - *start;
if (diff >= (int)sizeof(event->header)) {
size_t size;
- event = (union perf_event *)&data[start & map->mask];
+ event = (union perf_event *)&data[*start & map->mask];
size = event->header.size;
if (size < sizeof(event->header) || diff < (int)size) {
@@ -43,8 +43,8 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
- if ((start & map->mask) + size != ((start + size) & map->mask)) {
- unsigned int offset = start;
+ if ((*start & map->mask) + size != ((*start + size) & map->mask)) {
+ unsigned int offset = *start;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = map->event_copy;
@@ -59,12 +59,12 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
event = (union perf_event *)map->event_copy;
}
- start += size;
+ *start += size;
}
broken_event:
if (prev)
- *prev = start;
+ *prev = *start;
return event;
}
@@ -132,6 +107,42 @@ void perf_mmap__read_catchup(struct perf_mmap *map)
map->prev = head;
}
+
+/*
+ * Read event from ring buffer. Return one event for each call.
+ * Support both overwirte and non-overwrite mode.
+ * The start and end are only available for overwirte mode, which
+ * pause the ringbuffer.
+ *
+ * Usage:
+ * perf_mmap__read_init
+ * while(event = perf_mmap__read_event) {
+ * //process the event
+ * perf_mmap__consume
+ * }
+ * perf_mmap__read_done
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map,
+ bool overwrite,
+ u64 *start, u64 end)
+{
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return NULL;
+
+ if (!overwrite)
+ end = perf_mmap__read_head(map);
+
+ return perf_mmap__read(map, start, end, &map->prev);
+}
+
static bool perf_mmap__empty(struct perf_mmap *map)
{
return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
Thanks,
Kan