[PATCH 05/18] perf tools: Add documentation for perf.data on disk format

From: Arnaldo Carvalho de Melo
Date: Mon Jun 27 2016 - 17:02:24 EST


From: Andi Kleen <ak@xxxxxxxxxxxxxxx>

Add some documentation for the on disk format of perf.data. This is not
documenting the actual perf events -- which are documented in
perf_event.h -- but just the additional headers that perf record adds
around them when writing the data to disk.

Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Link: http://lkml.kernel.org/r/1466800885-12974-1-git-send-email-andi@xxxxxxxxxxxxxx
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
tools/perf/Documentation/perf-file-format.txt | 442 ++++++++++++++++++++++++++
1 file changed, 442 insertions(+)
create mode 100644 tools/perf/Documentation/perf-file-format.txt

diff --git a/tools/perf/Documentation/perf-file-format.txt b/tools/perf/Documentation/perf-file-format.txt
new file mode 100644
index 000000000000..fdc99fe6bbc3
--- /dev/null
+++ b/tools/perf/Documentation/perf-file-format.txt
@@ -0,0 +1,442 @@
+perf.data format
+
+Uptodate as of v4.7
+
+This document describes the on-disk perf.data format, generated by perf record
+or perf inject and consumed by the other perf tools.
+
+On a high level perf.data contains the events generated by the PMUs, plus metadata.
+
+All fields are in native-endian of the machine that generated the perf.data.
+
+When perf is writing to a pipe it uses a special version of the file
+format that does not rely on seeking to adjust data offsets. This
+format is not described here. The pipe version can be converted to
+normal perf.data with perf inject.
+
+The file starts with a perf_header:
+
+struct perf_header {
+ char magic[8]; /* PERFILE2 */
+ uint64_t size; /* size of the header */
+ uint64_t attr_size; /* size of an attribute in attrs */
+ struct perf_file_section attrs;
+ struct perf_file_section data;
+ struct perf_file_section event_types;
+ uint64_t flags;
+ uint64_t flags1[3];
+};
+
+The magic number identifies the perf file and the version. Current perf versions
+use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
+is not described here. The magic number also identifies the endian. When the
+magic value is 64bit byte swapped compared the file is in non-native
+endian.
+
+A perf_file_section contains a pointer to another section of the perf file.
+The header contains three such pointers: for attributes, data and event types.
+
+struct perf_file_section {
+ uint64_t offset; /* offset from start of file */
+ uint64_t size; /* size of the section */
+};
+
+Flags section:
+
+The header is followed by different optional headers, described by the bits set
+in flags. Only headers for which the bit is set are included. Each header
+consists of a perf_file_section located after the initial header.
+The respective perf_file_section points to the data of the additional
+header and defines its size.
+
+Some headers consist of strings, which are defined like this:
+
+struct perf_header_string {
+ uint32_t len;
+ char string[len]; /* zero terminated */
+};
+
+Some headers consist of a sequence of strings, which start with a
+
+struct perf_header_string_list {
+ uint32_t nr;
+ struct perf_header_string strings[nr]; /* variable length records */
+};
+
+The bits are the flags bits in a 256 bit bitmap starting with
+flags. These define the valid bits:
+
+ HEADER_RESERVED = 0, /* always cleared */
+ HEADER_FIRST_FEATURE = 1,
+ HEADER_TRACING_DATA = 1,
+
+Describe me.
+
+ HEADER_BUILD_ID = 2,
+
+The header consists of an sequence of build_id_event. The size of each record
+is defined by header.size (see perf_event.h). Each event defines a ELF build id
+for a executable file name for a pid. An ELF build id is a unique identifier
+assigned by the linker to an executable.
+
+struct build_id_event {
+ struct perf_event_header header;
+ pid_t pid;
+ uint8_t build_id[24];
+ char filename[header.size - offsetof(struct build_id_event, filename)];
+};
+
+ HEADER_HOSTNAME = 3,
+
+A perf_header_string with the hostname where the data was collected
+(uname -n)
+
+ HEADER_OSRELEASE = 4,
+
+A perf_header_string with the os release where the data was collected
+(uname -r)
+
+ HEADER_VERSION = 5,
+
+A perf_header_string with the perf user tool version where the
+data was collected. This is the same as the version of the source tree
+the perf tool was built from.
+
+ HEADER_ARCH = 6,
+
+A perf_header_string with the CPU architecture (uname -m)
+
+ HEADER_NRCPUS = 7,
+
+A structure defining the number of CPUs.
+
+struct nr_cpus {
+ uint32_t nr_cpus_online;
+ uint32_t nr_cpus_available; /* CPUs not yet onlined */
+};
+
+ HEADER_CPUDESC = 8,
+
+A perf_header_string with description of the CPU. On x86 this is the model name
+in /proc/cpuinfo
+
+ HEADER_CPUID = 9,
+
+A perf_header_string with the exact CPU type. On x86 this is
+vendor,family,model,stepping. For example: GenuineIntel,6,69,1
+
+ HEADER_TOTAL_MEM = 10,
+
+An uint64_t with the total memory in bytes.
+
+ HEADER_CMDLINE = 11,
+
+A perf_header_string with the perf command line used to collect the data.
+
+ HEADER_EVENT_DESC = 12,
+
+Another description of the perf_event_attrs, more detailed than header.attrs
+including IDs and names. See perf_event.h or the man page for a description
+of a struct perf_event_attr.
+
+struct {
+ uint32_t nr; /* number of events */
+ uint32_t attr_size; /* size of each perf_event_attr */
+ struct {
+ struct perf_event_attr attr; /* size of attr_size */
+ uint32_t nr_ids;
+ struct perf_header_string event_string;
+ uint64_t ids[nr_ids];
+ } events[nr]; /* Variable length records */
+};
+
+ HEADER_CPU_TOPOLOGY = 13,
+
+String lists defining the core and CPU threads topology.
+
+struct {
+ struct perf_header_string_list cores; /* Variable length */
+ struct perf_header_string_list threads; /* Variable length */
+};
+
+Example:
+ sibling cores : 0-3
+ sibling threads : 0-1
+ sibling threads : 2-3
+
+ HEADER_NUMA_TOPOLOGY = 14,
+
+ A list of NUMA node descriptions
+
+struct {
+ uint32_t nr;
+ struct {
+ uint32_t nodenr;
+ uint64_t mem_total;
+ uint64_t mem_free;
+ struct perf_header_string cpus;
+ } nodes[nr]; /* Variable length records */
+};
+
+ HEADER_BRANCH_STACK = 15,
+
+Not implemented in perf.
+
+ HEADER_PMU_MAPPINGS = 16,
+
+ A list of PMU structures, defining the different PMUs supported by perf.
+
+struct {
+ uint32_t nr;
+ struct pmu {
+ uint32_t pmu_type;
+ struct perf_header_string pmu_name;
+ } [nr]; /* Variable length records */
+};
+
+ HEADER_GROUP_DESC = 17,
+
+ Description of counter groups ({...} in perf syntax)
+
+struct {
+ uint32_t nr;
+ struct {
+ struct perf_header_string string;
+ uint32_t leader_idx;
+ uint32_t nr_members;
+ } [nr]; /* Variable length records */
+};
+
+ HEADER_AUXTRACE = 18,
+
+Define additional auxtrace areas in the perf.data. auxtrace is used to store
+undecoded hardware tracing information, such as Intel Processor Trace data.
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ * perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+ u64 file_offset;
+ u64 sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ * file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+ struct list_head list;
+ size_t nr;
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+ other bits are reserved and should ignored for now
+ HEADER_FEAT_BITS = 256,
+
+Attributes
+
+This is an array of perf_event_attrs, each attr_size bytes long, which defines
+each event collected. See perf_event.h or the man page for a detailed
+description.
+
+Data
+
+This section is the bulk of the file. It consist of a stream of perf_events
+describing events. This matches the format generated by the kernel.
+See perf_event.h or the manpage for a detailed description.
+
+Some notes on parsing:
+
+Ordering
+
+The events are not necessarily in time stamp order, as they can be
+collected in parallel on different CPUs. If the events should be
+processed in time order they need to be sorted first. It is possible
+to only do a partial sort using the FINISHED_ROUND event header (see
+below). perf record guarantees that there is no reordering over a
+FINISHED_ROUND.
+
+ID vs IDENTIFIER
+
+When the event stream contains multiple events each event is identified
+by an ID. This can be either through the PERF_SAMPLE_ID or the
+PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
+at a fixed offset from the event header, which allows reliable
+parsing of the header. Relying on ID may be ambigious.
+IDENTIFIER is only supported by newer Linux kernels.
+
+Perf record specific events:
+
+In addition to the kernel generated event types perf record adds its
+own event types (in addition it also synthesizes some kernel events,
+for example MMAP events)
+
+ PERF_RECORD_USER_TYPE_START = 64,
+ PERF_RECORD_HEADER_ATTR = 64,
+
+struct attr_event {
+ struct perf_event_header header;
+ struct perf_event_attr attr;
+ uint64_t id[];
+};
+
+ PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+ uint64_t event_id;
+ char name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+ struct perf_event_header header;
+ struct perf_trace_event_type event_type;
+};
+
+
+ PERF_RECORD_HEADER_TRACING_DATA = 66,
+
+Describe me
+
+struct tracing_data_event {
+ struct perf_event_header header;
+ uint32_t size;
+};
+
+ PERF_RECORD_HEADER_BUILD_ID = 67,
+
+Define a ELF build ID for a referenced executable.
+
+ struct build_id_event; /* See above */
+
+ PERF_RECORD_FINISHED_ROUND = 68,
+
+No event reordering over this header. No payload.
+
+ PERF_RECORD_ID_INDEX = 69,
+
+Map event ids to CPUs and TIDs.
+
+struct id_index_entry {
+ uint64_t id;
+ uint64_t idx;
+ uint64_t cpu;
+ uint64_t tid;
+};
+
+struct id_index_event {
+ struct perf_event_header header;
+ uint64_t nr;
+ struct id_index_entry entries[nr];
+};
+
+ PERF_RECORD_AUXTRACE_INFO = 70,
+
+Auxtrace type specific information. Describe me
+
+struct auxtrace_info_event {
+ struct perf_event_header header;
+ uint32_t type;
+ uint32_t reserved__; /* For alignment */
+ uint64_t priv[];
+};
+
+ PERF_RECORD_AUXTRACE = 71,
+
+Defines auxtrace data. Followed by the actual data. The contents of
+the auxtrace data is dependent on the event and the CPU. For example
+for Intel Processor Trace it contains Processor Trace data generated
+by the CPU.
+
+struct auxtrace_event {
+ struct perf_event_header header;
+ uint64_t size;
+ uint64_t offset;
+ uint64_t reference;
+ uint32_t idx;
+ uint32_t tid;
+ uint32_t cpu;
+ uint32_t reserved__; /* For alignment */
+};
+
+struct aux_event {
+ struct perf_event_header header;
+ uint64_t aux_offset;
+ uint64_t aux_size;
+ uint64_t flags;
+};
+
+ PERF_RECORD_AUXTRACE_ERROR = 72,
+
+Describes an error in hardware tracing
+
+enum auxtrace_error_type {
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
+ PERF_AUXTRACE_ERROR_MAX
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+ struct perf_event_header header;
+ uint32_t type;
+ uint32_t code;
+ uint32_t cpu;
+ uint32_t pid;
+ uint32_t tid;
+ uint32_t reserved__; /* For alignment */
+ uint64_t ip;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+Event types
+
+Define the event attributes with their IDs.
+
+An array bound by the perf_file_section size.
+
+ struct {
+ struct perf_event_attr attr; /* Size defined by header.attr_size */
+ struct perf_file_section ids;
+ }
+
+ids points to a array of uint64_t defining the ids for event attr attr.
+
+References:
+
+include/uapi/linux/perf_event.h
+
+This is the canonical description of the kernel generated perf_events
+and the perf_event_attrs.
+
+perf_events manpage
+
+A manpage describing perf_event and perf_event_attr is here:
+http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
+This tends to be slightly behind the kernel include, but has better
+descriptions. An (typically older) version of the man page may be
+included with the standard Linux man pages, available with "man
+perf_events"
+
+pmu-tools
+
+https://github.com/andikleen/pmu-tools/tree/master/parser
+
+A definition of the perf.data format in python "construct" format is available
+in pmu-tools parser. This allows to read perf.data from python and dump it.
+
+quipper
+
+The quipper C++ parser is available at
+https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+Unfortunately this parser tends to be many versions behind and may not be able
+to parse data files generated by recent perf.
--
2.7.4