[PATCH v1 2/2]: perf record: enable asynchronous trace writing

From: Alexey Budankov
Date: Thu Aug 23 2018 - 06:32:05 EST



Trace file offsets are different for every enqueued write operation
and calculated dynamically in trace streaming loop and don't overlap
so write requests can be written in parallel.

record__mmap_read_sync implements sort of a barrier between spilling
ready profiling data to disk.

Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
---
tools/perf/builtin-record.c | 97 +++++++++++++++++++++++++++++++++++++++++----
tools/perf/util/mmap.c | 23 +++++------
tools/perf/util/mmap.h | 2 +-
3 files changed, 101 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a35675e9f3aa..dee63229ed37 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -132,7 +132,7 @@ static int record__aio_write(int trace_fd, struct aiocb *cblock,
cblock->aio_sigevent.sigev_notify = SIGEV_NONE;

if (aio_write(cblock) == -1) {
- pr_err("failed to write perf data, error: %m\n");
+ pr_err("failed to queue perf data, error: %m\n");
return -1;
}

@@ -148,12 +148,14 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, event, event->header.size);
}

-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(void *to, void *bf, size_t size, off_t off)
{
struct record *rec = to;
+ struct perf_mmap *map = bf;

rec->samples++;
- return record__write(rec, bf, size);
+ return record__aio_write(rec->session->data->file.fd, &map->cblock,
+ map->data, size, off);
}

static volatile int done;
@@ -528,13 +530,85 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};

+static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks,
+ int cblocks_size, struct record *rec)
+{
+ size_t rem;
+ ssize_t size;
+ off_t rem_off;
+ int i, aio_ret, aio_errno, do_suspend;
+ struct perf_mmap *md;
+ struct timespec timeout0 = { 0, 0 };
+ struct timespec timeoutS = { 0, 1000 * 500 * 1 };
+
+ if (!cblocks_size)
+ return 0;
+
+ do {
+ do_suspend = 0;
+ nanosleep(&timeoutS, NULL);
+ if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, &timeout0)) {
+ if (errno == EAGAIN || errno == EINTR) {
+ do_suspend = 1;
+ continue;
+ } else {
+ pr_err("failed to sync perf data, error: %m\n");
+ break;
+ }
+ }
+ for (i = 0; i < cblocks_size; i++) {
+ if (cblocks[i] == NULL) {
+ continue;
+ }
+ aio_errno = aio_error(cblocks[i]);
+ if (aio_errno == EINPROGRESS) {
+ do_suspend = 1;
+ continue;
+ }
+ size = aio_ret = aio_return((struct aiocb*)cblocks[i]);
+ if (aio_ret < 0) {
+ if (aio_errno == EINTR) {
+ size = 0;
+ } else {
+ pr_err("failed to write perf data, error: %m\n");
+ cblocks[i] = NULL;
+ continue;
+ }
+ }
+ rec->bytes_written += size;
+ md = (struct perf_mmap*)((char*)cblocks[i] -
+ offsetof(struct perf_mmap, cblock));
+ rem = cblocks[i]->aio_nbytes - (size_t)size;
+ if (rem == 0) {
+ perf_mmap__put(md);
+ cblocks[i] = NULL;
+ if (switch_output_size(rec))
+ trigger_hit(&switch_output_trigger);
+ } else {
+ rem_off = cblocks[i]->aio_offset +
+ cblocks[i]->aio_nbytes - rem;
+ if (!record__aio_write(trace_fd,
+ (struct aiocb *)cblocks[i],
+ md->data + cblocks[i]->aio_nbytes - rem,
+ rem, rem_off))
+ do_suspend = 1;
+ }
+ }
+ } while (do_suspend);
+
+ return 0;
+}
+
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
bool overwrite)
{
u64 bytes_written = rec->bytes_written;
- int i;
- int rc = 0;
+ int i, rc = 0;
struct perf_mmap *maps;
+ int trace_fd = rec->session->data->file.fd;
+ struct aiocb **mmap_aio = rec->evlist->mmap_aio;
+ int mmap_aio_size = 0;
+ off_t off;

if (!evlist)
return 0;
@@ -546,14 +620,17 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;

+ off = lseek(trace_fd, 0, SEEK_CUR);
+
for (i = 0; i < evlist->nr_mmaps; i++) {
struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;

if (maps[i].base) {
- if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
- rc = -1;
+ rc = perf_mmap__push(&maps[i], rec, record__pushfn, &off);
+ if (rc < 0)
goto out;
- }
+ else if (rc > 0)
+ mmap_aio[mmap_aio_size++] = &maps[i].cblock;
}

if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
@@ -563,6 +640,10 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
}
}

+ record__mmap_read_sync(trace_fd, mmap_aio, mmap_aio_size, rec);
+
+ lseek(trace_fd, off, SEEK_SET);
+
/*
* Mark the round finished in case we wrote
* at least one event.
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index e71d46cb01cc..c8b921c88a5d 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -292,11 +292,11 @@ int perf_mmap__read_init(struct perf_mmap *map)
}

int perf_mmap__push(struct perf_mmap *md, void *to,
- int push(void *to, void *buf, size_t size))
+ int push(void *to, void *buf, size_t size, off_t), off_t *off)
{
u64 head = perf_mmap__read_head(md);
unsigned char *data = md->base + page_size;
- unsigned long size;
+ unsigned long size, size0 = 0;
void *buf;
int rc = 0;

@@ -308,23 +308,22 @@ int perf_mmap__push(struct perf_mmap *md, void *to,

if ((md->start & md->mask) + size != (md->end & md->mask)) {
buf = &data[md->start & md->mask];
- size = md->mask + 1 - (md->start & md->mask);
- md->start += size;
-
- if (push(to, buf, size) < 0) {
- rc = -1;
- goto out;
- }
+ size0 = md->mask + 1 - (md->start & md->mask);
+ md->start += size0;
+ memcpy(md->data, buf, size0);
}

buf = &data[md->start & md->mask];
size = md->end - md->start;
md->start += size;
+ memcpy(md->data + size0, buf, size);

- if (push(to, buf, size) < 0) {
- rc = -1;
+ rc = push(to, md, size0 + size, *off) < 0 ? -1 : 1;
+ if (rc == -1)
goto out;
- }
+
+ perf_mmap__get(md);
+ *off += size0 + size;

md->prev = head;
perf_mmap__consume(md);
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 1974e621e36b..6211a3a0c4c3 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -95,7 +95,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
union perf_event *perf_mmap__read_event(struct perf_mmap *map);

int perf_mmap__push(struct perf_mmap *md, void *to,
- int push(void *to, void *buf, size_t size));
+ int push(void *to, void *buf, size_t size, off_t off), off_t *off);

size_t perf_mmap__mmap_len(struct perf_mmap *map);