Re: [PATCH v9 3/3]: perf record: extend trace writing to multi AIO

From: Namhyung Kim
Date: Fri Oct 05 2018 - 03:22:55 EST


On Wed, Oct 03, 2018 at 07:17:01PM +0300, Alexey Budankov wrote:
>
> Multi AIO trace writing allows caching more kernel data into userspace
> memory postponing trace writing for the sake of overall profiling data
> thruput increase. It could be seen as kernel data buffer extension into
> userspace memory.
>
> With aio-cblocks option value different from 0, current default value,
> tool has capability to cache more and more data into user space
> along with delegating spill to AIO.
>
> That allows avoiding suspend at record__aio_sync() between calls of
> record__mmap_read_evlist() and increase profiling data thruput for
> the cost of userspace memory.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@xxxxxxxxxxxxxxx>
> ---
> tools/perf/builtin-record.c | 59 +++++++++++++++++++++++++++------
> tools/perf/util/evlist.c | 7 ++--
> tools/perf/util/evlist.h | 3 +-
> tools/perf/util/mmap.c | 79 ++++++++++++++++++++++++++++++++-------------
> tools/perf/util/mmap.h | 7 ++--
> 5 files changed, 115 insertions(+), 40 deletions(-)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index c63fe8c021a2..7b67574be5b7 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -196,16 +196,35 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
> return rc;
> }
>
> -static void record__aio_sync(struct perf_mmap *md)
> +static int record__aio_sync(struct perf_mmap *md, bool sync_all)
> {
> - struct aiocb *cblock = &md->cblock;
> + struct aiocb **aiocb = md->aiocb;
> + struct aiocb *cblocks = md->cblocks;
> struct timespec timeout = { 0, 1000 * 1000 * 1 }; // 1ms
> + int i, do_suspend;
>
> do {
> - if (cblock->aio_fildes == -1 || record__aio_complete(md, cblock))
> - return;
> + do_suspend = 0;
> + for (i = 0; i < md->nr_cblocks; ++i) {
> + if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
> + if (sync_all)
> + aiocb[i] = NULL;
> + else
> + return i;
> + } else {
> + /*
> + * Started aio write is not complete yet
> + * so it has to be waited before the
> + * next allocation.
> + */
> + aiocb[i] = &cblocks[i];
> + do_suspend = 1;
> + }
> + }
> + if (!do_suspend)
> + return -1;
>
> - while (aio_suspend((const struct aiocb**)&cblock, 1, &timeout)) {
> + while (aio_suspend((const struct aiocb **)aiocb, md->nr_cblocks, &timeout)) {
> if (!(errno == EAGAIN || errno == EINTR))
> pr_err("failed to sync perf data, error: %m\n");
> }
> @@ -436,11 +455,15 @@ static int record__mmap_evlist(struct record *rec,
> struct perf_evlist *evlist)
> {
> struct record_opts *opts = &rec->opts;
> + int nr_cblocks = 0;
> char msg[512];
> -
> +#ifdef HAVE_AIO_SUPPORT
> + nr_cblocks = opts->nr_cblocks;
> +#endif
> if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
> opts->auxtrace_mmap_pages,
> - opts->auxtrace_snapshot_mode) < 0) {
> + opts->auxtrace_snapshot_mode,
> + nr_cblocks) < 0) {
> if (errno == EPERM) {
> pr_err("Permission error mapping pages.\n"
> "Consider increasing "
> @@ -637,7 +660,7 @@ static void record__mmap_read_sync(struct record *rec __maybe_unused)
> for (i = 0; i < evlist->nr_mmaps; i++) {
> struct perf_mmap *map = &maps[i];
> if (map->base)
> - record__aio_sync(map);
> + record__aio_sync(map, true);
> }
> #endif
> }
> @@ -676,12 +699,13 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
> goto out;
> }
> } else {
> + int idx;
> /*
> * Call record__aio_sync() to wait till map->data buffer
> * becomes available after previous aio write request.
> */
> - record__aio_sync(map);
> - if (perf_mmap__aio_push(map, rec, record__aio_pushfn) != 0) {
> + idx = record__aio_sync(map, false);
> + if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn) != 0) {
> rc = -1;
> goto out;
> }
> @@ -1442,6 +1466,10 @@ static int perf_record_config(const char *var, const char *value, void *cb)
> var = "call-graph.record-mode";
> return perf_default_config(var, value, cb);
> }
> +#ifdef HAVE_AIO_SUPPORT
> + if (!strcmp(var, "record.aio-cblocks"))
> + rec->opts.nr_cblocks = strtol(value, NULL, 0);
> +#endif
>
> return 0;
> }
> @@ -1833,6 +1861,10 @@ static struct option __record_options[] = {
> "signal"),
> OPT_BOOLEAN(0, "dry-run", &dry_run,
> "Parse options then exit"),
> +#ifdef HAVE_AIO_SUPPORT
> + OPT_INTEGER(0, "aio-cblocks", &record.opts.nr_cblocks,
> + "Max number of simultaneous per-mmap trace writes (default: 0 - serial, max: 4)"),
> +#endif
> OPT_END()
> };

Please update the documentation for the new option and config
variable. Perhaps it can provide the option always and check the
availability at runtime (with a warning message if not enabled).

Thanks,
Namhyung


>
> @@ -2025,6 +2057,13 @@ int cmd_record(int argc, const char **argv)
> goto out;
> }
>
> +#ifdef HAVE_AIO_SUPPORT
> + if (rec->opts.nr_cblocks > 4)
> + rec->opts.nr_cblocks = 4;
> + if (verbose > 0)
> + pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
> +#endif
> +
> err = __cmd_record(&record, argc, argv);
> out:
> perf_evlist__delete(rec->evlist);