Re: [PATCH V7 02/25] perf evlist: Add initial support for mmapping an AUX area buffer

From: Jiri Olsa
Date: Wed Apr 01 2015 - 07:50:16 EST


On Tue, Mar 31, 2015 at 02:38:31PM +0300, Adrian Hunter wrote:
> This patch anticipates the addition to the kernel
> of an "aux" buffer that can be mmapped separately
> from the perf-events buffer.
>
> The expectation is that this buffer can be configured
> to contain hardware-produced trace information.
> The first implementation will support Intel BTS and
> Intel PT.
>
> One auxtrace buffer is mmapped per perf-events buffer.
> If the requested auxtrace buffer size is zero, which
> it will be until further support is added, then
> no auxtrace mmapping is attempted.
>
> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>

Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx>

> ---
> tools/perf/util/Build | 1 +
> tools/perf/util/auxtrace.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
> tools/perf/util/auxtrace.h | 97 ++++++++++++++++++++++++++++++++++++++++
> tools/perf/util/evlist.c | 61 +++++++++++++++++++++++--
> tools/perf/util/evlist.h | 5 +++
> 5 files changed, 269 insertions(+), 3 deletions(-)
> create mode 100644 tools/perf/util/auxtrace.c
> create mode 100644 tools/perf/util/auxtrace.h
>
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 797490a..b6c3f39 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -74,6 +74,7 @@ libperf-y += data.o
> libperf-$(CONFIG_X86) += tsc.o
> libperf-y += cloexec.o
> libperf-y += thread-stack.o
> +libperf-y += auxtrace.o
>
> libperf-$(CONFIG_LIBELF) += symbol-elf.o
> libperf-$(CONFIG_LIBELF) += probe-event.o
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> new file mode 100644
> index 0000000..75419a5
> --- /dev/null
> +++ b/tools/perf/util/auxtrace.c
> @@ -0,0 +1,108 @@
> +/*
> + * auxtrace.c: AUX area trace support
> + * Copyright (c) 2013-2015, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#include <stdbool.h>
> +
> +#include <linux/kernel.h>
> +#include <linux/perf_event.h>
> +#include <linux/types.h>
> +#include <linux/bitops.h>
> +#include <linux/log2.h>
> +
> +#include "../perf.h"
> +#include "util.h"
> +#include "evlist.h"
> +#include "cpumap.h"
> +#include "thread_map.h"
> +#include "asm/bug.h"
> +#include "auxtrace.h"
> +
> +int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
> + struct auxtrace_mmap_params *mp,
> + void *userpg, int fd)
> +{
> +#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
> + pr_err("Cannot use AUX area tracing mmaps\n");
> + return -1;
> +#endif
> +
> + WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
> +
> + mm->userpg = userpg;
> + mm->mask = mp->mask;
> + mm->len = mp->len;
> + mm->prev = 0;
> + mm->idx = mp->idx;
> + mm->tid = mp->tid;
> + mm->cpu = mp->cpu;
> +
> + if (!mp->len) {
> + mm->base = NULL;
> + return 0;
> + }
> +
> + mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
> + if (mm->base == MAP_FAILED) {
> + pr_debug2("failed to mmap AUX area\n");
> + mm->base = NULL;
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
> +{
> + if (mm->base) {
> + munmap(mm->base, mm->len);
> + mm->base = NULL;
> + }
> +}
> +
> +void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
> + off_t auxtrace_offset,
> + unsigned int auxtrace_pages,
> + bool auxtrace_overwrite)
> +{
> + if (auxtrace_pages) {
> + mp->offset = auxtrace_offset;
> + mp->len = auxtrace_pages * (size_t)page_size;
> + mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
> + mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
> + pr_debug2("AUX area mmap length %zu\n", mp->len);
> + } else {
> + mp->len = 0;
> + }
> +}
> +
> +void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
> + struct perf_evlist *evlist, int idx,
> + bool per_cpu)
> +{
> + mp->idx = idx;
> +
> + if (per_cpu) {
> + mp->cpu = evlist->cpus->map[idx];
> + if (evlist->threads)
> + mp->tid = evlist->threads->map[0];
> + else
> + mp->tid = -1;
> + } else {
> + mp->cpu = -1;
> + mp->tid = evlist->threads->map[idx];
> + }
> +}
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> new file mode 100644
> index 0000000..735ca2a
> --- /dev/null
> +++ b/tools/perf/util/auxtrace.h
> @@ -0,0 +1,97 @@
> +/*
> + * auxtrace.h: AUX area trace support
> + * Copyright (c) 2013-2015, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef __PERF_AUXTRACE_H
> +#define __PERF_AUXTRACE_H
> +
> +#include <sys/types.h>
> +#include <stdbool.h>
> +
> +#include <linux/perf_event.h>
> +#include <linux/types.h>
> +
> +#include "../perf.h"
> +
> +struct perf_evlist;
> +
> +/**
> + * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
> + * @base: address of mapped area
> + * @userpg: pointer to buffer's perf_event_mmap_page
> + * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
> + * @len: size of mapped area
> + * @prev: previous aux_head
> + * @idx: index of this mmap
> + * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
> + * mmap) otherwise %0
> + * @cpu: cpu number for a per-cpu mmap otherwise %-1
> + */
> +struct auxtrace_mmap {
> + void *base;
> + void *userpg;
> + size_t mask;
> + size_t len;
> + u64 prev;
> + int idx;
> + pid_t tid;
> + int cpu;
> +};
> +
> +/**
> + * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
> + * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
> + * @offset: file offset of mapped area
> + * @len: size of mapped area
> + * @prot: mmap memory protection
> + * @idx: index of this mmap
> + * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
> + * mmap) otherwise %0
> + * @cpu: cpu number for a per-cpu mmap otherwise %-1
> + */
> +struct auxtrace_mmap_params {
> + size_t mask;
> + off_t offset;
> + size_t len;
> + int prot;
> + int idx;
> + pid_t tid;
> + int cpu;
> +};
> +
> +static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm __maybe_unused)
> +{
> + /* Not yet implemented */
> + return 0;
> +}
> +
> +static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm __maybe_unused,
> + u64 tail __maybe_unused)
> +{
> + /* Not yet implemented */
> +}
> +
> +int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
> + struct auxtrace_mmap_params *mp,
> + void *userpg, int fd);
> +void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
> +void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
> + off_t auxtrace_offset,
> + unsigned int auxtrace_pages,
> + bool auxtrace_overwrite);
> +void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
> + struct perf_evlist *evlist, int idx,
> + bool per_cpu);
> +
> +#endif
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 82bf224..3481265 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -725,6 +725,34 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
> perf_evlist__mmap_put(evlist, idx);
> }
>
> +int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
> + struct auxtrace_mmap_params *mp __maybe_unused,
> + void *userpg __maybe_unused,
> + int fd __maybe_unused)
> +{
> + return 0;
> +}
> +
> +void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
> +{
> +}
> +
> +void __weak auxtrace_mmap_params__init(
> + struct auxtrace_mmap_params *mp __maybe_unused,
> + off_t auxtrace_offset __maybe_unused,
> + unsigned int auxtrace_pages __maybe_unused,
> + bool auxtrace_overwrite __maybe_unused)
> +{
> +}
> +
> +void __weak auxtrace_mmap_params__set_idx(
> + struct auxtrace_mmap_params *mp __maybe_unused,
> + struct perf_evlist *evlist __maybe_unused,
> + int idx __maybe_unused,
> + bool per_cpu __maybe_unused)
> +{
> +}
> +
> static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
> {
> if (evlist->mmap[idx].base != NULL) {
> @@ -732,6 +760,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
> evlist->mmap[idx].base = NULL;
> evlist->mmap[idx].refcnt = 0;
> }
> + auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
> }
>
> void perf_evlist__munmap(struct perf_evlist *evlist)
> @@ -759,6 +788,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
> struct mmap_params {
> int prot;
> int mask;
> + struct auxtrace_mmap_params auxtrace_mp;
> };
>
> static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
> @@ -789,6 +819,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
> return -1;
> }
>
> + if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
> + &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
> + return -1;
> +
> return 0;
> }
>
> @@ -853,6 +887,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
> for (cpu = 0; cpu < nr_cpus; cpu++) {
> int output = -1;
>
> + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
> + true);
> +
> for (thread = 0; thread < nr_threads; thread++) {
> if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
> thread, &output))
> @@ -878,6 +915,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
> for (thread = 0; thread < nr_threads; thread++) {
> int output = -1;
>
> + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
> + false);
> +
> if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
> &output))
> goto out_unmap;
> @@ -981,19 +1021,25 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
> }
>
> /**
> - * perf_evlist__mmap - Create mmaps to receive events.
> + * perf_evlist__mmap_ex - Create mmaps to receive events.
> * @evlist: list of events
> * @pages: map length in pages
> * @overwrite: overwrite older events?
> + * @auxtrace_pages - auxtrace map length in pages
> + * @auxtrace_overwrite - overwrite older auxtrace data?
> *
> * If @overwrite is %false the user needs to signal event consumption using
> * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this
> * automatically.
> *
> + * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
> + * consumption using auxtrace_mmap__write_tail().
> + *
> * Return: %0 on success, negative error code otherwise.
> */
> -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
> - bool overwrite)
> +int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
> + bool overwrite, unsigned int auxtrace_pages,
> + bool auxtrace_overwrite)
> {
> struct perf_evsel *evsel;
> const struct cpu_map *cpus = evlist->cpus;
> @@ -1013,6 +1059,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
> pr_debug("mmap size %zuB\n", evlist->mmap_len);
> mp.mask = evlist->mmap_len - page_size - 1;
>
> + auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
> + auxtrace_pages, auxtrace_overwrite);
> +
> evlist__for_each(evlist, evsel) {
> if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
> evsel->sample_id == NULL &&
> @@ -1026,6 +1075,12 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
> return perf_evlist__mmap_per_cpu(evlist, &mp);
> }
>
> +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
> + bool overwrite)
> +{
> + return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
> +}
> +
> int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
> {
> evlist->threads = thread_map__new_str(target->pid, target->tid,
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index fb19c47..beed571 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -8,6 +8,7 @@
> #include "event.h"
> #include "evsel.h"
> #include "util.h"
> +#include "auxtrace.h"
> #include <unistd.h>
>
> struct pollfd;
> @@ -28,6 +29,7 @@ struct perf_mmap {
> int mask;
> int refcnt;
> unsigned int prev;
> + struct auxtrace_mmap auxtrace_mmap;
> char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
> };
>
> @@ -126,6 +128,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
> const char *str,
> int unset);
>
> +int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
> + bool overwrite, unsigned int auxtrace_pages,
> + bool auxtrace_overwrite);
> int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
> bool overwrite);
> void perf_evlist__munmap(struct perf_evlist *evlist);
> --
> 1.9.1
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/