[PATCH] tools/perf: Update the perf_event_attr portion of design.txt

From: Wang Xiaoqiang
Date: Sat Jan 16 2016 - 02:56:38 EST


Update things as follows:
* update the members of perf_event_attr
* remove the out of date explanation about 'config' bit-fields
* add 'type' field explanation
* also update the related enums - perf_type_id, perf_hw_id, and
perf_sw_ids

Signed-off-by: Wang Xiaoqiang <wangxq10@xxxxxxxxxx>
---
tools/perf/design.txt | 256 ++++++++++++++++++++++++++++++++------------------
1 file changed, 167 insertions(+), 89 deletions(-)

diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index a28dca2..45576f5 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -35,77 +35,139 @@ can be poll()ed.
When creating a new counter fd, 'perf_event_attr' is:

struct perf_event_attr {
- /*
- * The MSB of the config word signifies if the rest contains cpu
- * specific (raw) counter configuration data, if unset, the next
- * 7 bits are an event type and the rest of the bits are the event
- * identifier.
- */
- __u64 config;
-
- __u64 irq_period;
- __u32 record_type;
- __u32 read_format;
-
- __u64 disabled : 1, /* off by default */
- inherit : 1, /* children inherit it */
- pinned : 1, /* must always be on PMU */
- exclusive : 1, /* only group on PMU */
- exclude_user : 1, /* don't count user */
- exclude_kernel : 1, /* ditto kernel */
- exclude_hv : 1, /* ditto hypervisor */
- exclude_idle : 1, /* don't count when idle */
- mmap : 1, /* include mmap data */
- munmap : 1, /* include munmap data */
- comm : 1, /* include comm data */
-
- __reserved_1 : 52;
-
- __u32 extra_config_len;
- __u32 wakeup_events; /* wakeup every n events */
-
- __u64 __reserved_2;
- __u64 __reserved_3;
-};
-
-The 'config' field specifies what the counter should count. It
-is divided into 3 bit-fields:
-
-raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000
-type: 7 bits (next most significant) 0x7f00_0000_0000_0000
-event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff

-If 'raw_type' is 1, then the counter will count a hardware event
-specified by the remaining 63 bits of event_config. The encoding is
-machine-specific.
+ /*
+ * Major type: hardware/software/tracepoint/etc.
+ */
+ __u32 type;
+
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
+
+ /*
+ * Type specific configuration information.
+ */
+ __u64 config;
+
+ union {
+ __u64 sample_period;
+ __u64 sample_freq;
+ };
+
+ __u64 sample_type;
+ __u64 read_format;
+
+ __u64 disabled : 1, /* off by default */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+ exclude_idle : 1, /* don't count when idle */
+ mmap : 1, /* include mmap data */
+ comm : 1, /* include comm data */
+ freq : 1, /* use freq, not period */
+ inherit_stat : 1, /* per task counts */
+ enable_on_exec : 1, /* next exec enables */
+ task : 1, /* trace fork/exit */
+ watermark : 1, /* wakeup_watermark */
+ /*
+ * precise_ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ precise_ip : 2, /* skid constraint */
+ mmap_data : 1, /* non-exec mmap data */
+ sample_id_all : 1, /* sample_type all events */
+
+ exclude_host : 1, /* don't count in host */
+ exclude_guest : 1, /* don't count in guest */
+
+ exclude_callchain_kernel : 1, /* exclude kernel callchains */
+ exclude_callchain_user : 1, /* exclude user callchains */
+ mmap2 : 1, /* include mmap with inode data */
+ comm_exec : 1, /* flag comm events that are due to an exec */
+
+ __reserved_1 : 39;
+
+ union {
+ __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_watermark; /* bytes before wakeup */
+ };
+
+ __u32 bp_type;
+ union {
+ __u64 bp_addr;
+ __u64 config1; /* extension of config */
+ };
+ union {
+ __u64 bp_len;
+ __u64 config2; /* extension of config1 */
+ };
+ __u64 branch_sample_type; /* enum perf_branch_sample_type */
+
+ /*
+ * Defines set of user regs to dump on samples.
+ * See asm/perf_regs.h for details.
+ */
+ __u64 sample_regs_user;
+
+ /*
+ * Defines size of the user stack to dump on samples.
+ */
+ __u32 sample_stack_user;
+ /* Align to u64. */
+ __u32 __reserved_2;
+};

-If 'raw_type' is 0, then the 'type' field says what kind of counter
-this is, with the following encoding:
+The 'type' field says what kind of counter this is, with the
+following encoding:

enum perf_type_id {
- PERF_TYPE_HARDWARE = 0,
- PERF_TYPE_SOFTWARE = 1,
- PERF_TYPE_TRACEPOINT = 2,
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+ PERF_TYPE_HW_CACHE = 3,
+ PERF_TYPE_RAW = 4,
+ PERF_TYPE_BREAKPOINT = 5,
};

+The 'config' field specifies what the counter should count, with more
+event details, such as PERF_COUNT_HW_CACHE_MISSES in PERF_TYPE_HARDWARE,
+which counts the cache-misses event in hardware events.
+
A counter of PERF_TYPE_HARDWARE will count the hardware event
-specified by 'event_id':
+specified by 'config' field:

/*
- * Generalized performance counter event types, used by the hw_event.event_id
- * parameter of the sys_perf_event_open() syscall:
+ * Generalized performance event event_id types, used by the
+ * attr.config parameter of the sys_perf_event_open()
+ * syscall:
*/
enum perf_hw_id {
- /*
- * Common hardware events, generalized by the kernel:
- */
- PERF_COUNT_HW_CPU_CYCLES = 0,
- PERF_COUNT_HW_INSTRUCTIONS = 1,
- PERF_COUNT_HW_CACHE_REFERENCES = 2,
- PERF_COUNT_HW_CACHE_MISSES = 3,
- PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
- PERF_COUNT_HW_BRANCH_MISSES = 5,
- PERF_COUNT_HW_BUS_CYCLES = 6,
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_HW_CPU_CYCLES = 0,
+ PERF_COUNT_HW_INSTRUCTIONS = 1,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_MISSES = 3,
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BUS_CYCLES = 6,
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
+ PERF_COUNT_HW_REF_CPU_CYCLES = 9,
+
+ PERF_COUNT_HW_MAX, /* non-ABI */
};

These are standardized types of events that work relatively uniformly
@@ -118,27 +180,30 @@ will return -EINVAL.
More hw_event_types are supported as well, but they are CPU-specific
and accessed as raw events. For example, to count "External bus
cycles while bus lock signal asserted" events on Intel Core CPUs, pass
-in a 0x4064 event_id value and set hw_event.raw_type to 1.
+in a 0x4064 config value and set attr.type to PERF_TYPE_RAW.

A counter of type PERF_TYPE_SOFTWARE will count one of the available
software events, selected by 'event_id':

/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
* physical and sw events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
- PERF_COUNT_SW_CPU_CLOCK = 0,
- PERF_COUNT_SW_TASK_CLOCK = 1,
- PERF_COUNT_SW_PAGE_FAULTS = 2,
- PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
- PERF_COUNT_SW_CPU_MIGRATIONS = 4,
- PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
- PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
- PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
- PERF_COUNT_SW_EMULATION_FAULTS = 8,
+ PERF_COUNT_SW_CPU_CLOCK = 0,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4,
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
+ PERF_COUNT_SW_DUMMY = 9,
+
+ PERF_COUNT_SW_MAX, /* non-ABI */
};

Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
--
2.1.4