[PATCH v1 5/5] perf/x86/intel/pt: Add support for address range filtering in PT
From: Alexander Shishkin
Date: Thu Apr 21 2016 - 11:30:54 EST
Newer versions of Intel PT support address ranges, which can be used to
define IP address range-based filters or TraceSTOP regions. Number of
ranges in enumerated via cpuid.
This patch implements pmu callbacks and related low-level code to allow
filter validation, configuration and programming into the hardware.
Signed-off-by: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
---
arch/x86/events/intel/pt.c | 183 ++++++++++++++++++++++++++++++++++++++++++++-
arch/x86/events/intel/pt.h | 28 +++++++
2 files changed, 210 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 891447dd61..e577afb74d 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -253,6 +253,73 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned int reg_off;
+} pt_address_ranges[] = {
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR0_A,
+ .msr_b = MSR_IA32_RTIT_ADDR0_B,
+ .reg_off = RTIT_CTL_ADDR0_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR1_A,
+ .msr_b = MSR_IA32_RTIT_ADDR1_B,
+ .reg_off = RTIT_CTL_ADDR1_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR2_A,
+ .msr_b = MSR_IA32_RTIT_ADDR2_B,
+ .reg_off = RTIT_CTL_ADDR2_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR3_A,
+ .msr_b = MSR_IA32_RTIT_ADDR3_B,
+ .reg_off = RTIT_CTL_ADDR3_OFFSET,
+ }
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+ struct pt_filters *filters = event->hw.addr_filters;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ unsigned int range = 0;
+ u64 rtit_ctl = 0;
+
+ if (!filters)
+ return 0;
+
+ for (range = 0; range < filters->nr_filters; range++) {
+ struct pt_filter *filter = &filters->filter[range];
+
+ /*
+ * Note, if the range has zero start/end addresses due
+ * to its dynamic object not being loaded yet, we just
+ * go ahead and program zeroed range, which will simply
+ * produce no data. Note^2: if executable code at 0x0
+ * is a concern, we can set up an "invalid" configuration
+ * such as msr_b < msr_a.
+ */
+
+ /* avoid redundant msr writes */
+ if (pt->filters.filter[range].msr_a != filter->msr_a) {
+ wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+ pt->filters.filter[range].msr_a = filter->msr_a;
+ }
+
+ if (pt->filters.filter[range].msr_b != filter->msr_b) {
+ wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+ pt->filters.filter[range].msr_b = filter->msr_b;
+ }
+
+ rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ }
+
+ return rtit_ctl;
+}
+
static void pt_config(struct perf_event *event)
{
u64 reg;
@@ -262,7 +329,8 @@ static void pt_config(struct perf_event *event)
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
- reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+ reg = pt_config_filters(event);
+ reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
@@ -907,6 +975,105 @@ static void pt_buffer_free_aux(void *data)
kfree(buf);
}
+static int pt_addr_filters_init(struct perf_event *event)
+{
+ struct pt_filters *filters;
+ int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+ if (!pt_cap_get(PT_CAP_num_address_ranges))
+ return 0;
+
+ filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+ if (!filters)
+ return -ENOMEM;
+
+ if (event->parent)
+ memcpy(filters, event->parent->hw.addr_filters,
+ sizeof(*filters));
+
+ event->hw.addr_filters = filters;
+ event->hw.addr_filters_offs = filters->offs;
+
+ return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+ kfree(event->hw.addr_filters);
+ event->hw.addr_filters = NULL;
+ event->hw.addr_filters_offs = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+ struct perf_addr_filter *filter;
+ int range = 0;
+
+ list_for_each_entry(filter, filters, entry) {
+ /* PT doesn't support single address triggers */
+ if (!filter->range)
+ return -EOPNOTSUPP;
+
+ if (filter->kernel && !kernel_ip(filter->offset))
+ return -EINVAL;
+
+ if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void pt_event_start(struct perf_event *event, int mode);
+static void pt_event_stop(struct perf_event *event, int mode);
+
+static void pt_event_addr_filters_setup(struct perf_event *event,
+ unsigned long *offs, int flags)
+{
+ struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+ struct pt_filters *filters = event->hw.addr_filters;
+ unsigned int reload = flags & PERF_EF_RELOAD;
+ struct perf_addr_filter *filter;
+ unsigned long msr_a, msr_b;
+ int range = 0;
+
+ if (!filters ||
+ WARN_ON_ONCE(pt_event_addr_filters_validate(&head->list)))
+ return;
+
+ if (reload)
+ pt_event_stop(event, PERF_EF_UPDATE);
+
+ list_for_each_entry(filter, &head->list, entry) {
+ /* kernel-space filters are already configured */
+ if (filter->kernel) {
+ msr_a = filter->offset;
+ msr_b = filter->offset + filter->size;
+ } else if (!offs) {
+ /* clear the range */
+ msr_a = msr_b = 0;
+ } else if (offs[range] == 0) {
+ /* keep the existing range */
+ goto next;
+ } else {
+ /* apply the offset */
+ msr_a = filter->offset + offs[range];
+ msr_b = filter->size + msr_a;
+ }
+
+ filters->filter[range].msr_a = msr_a;
+ filters->filter[range].msr_b = msr_b;
+next:
+ filters->filter[range].config = filter->filter ? 1 : 2;
+ range++;
+ }
+
+ filters->nr_filters = range;
+
+ if (reload)
+ pt_event_start(event, PERF_EF_RELOAD);
+}
+
/**
* intel_pt_interrupt() - PT PMI handler
*/
@@ -1075,6 +1242,7 @@ static void pt_event_read(struct perf_event *event)
static void pt_event_destroy(struct perf_event *event)
{
+ pt_addr_filters_fini(event);
x86_del_exclusive(x86_lbr_exclusive_pt);
}
@@ -1089,6 +1257,11 @@ static int pt_event_init(struct perf_event *event)
if (x86_add_exclusive(x86_lbr_exclusive_pt))
return -EBUSY;
+ if (pt_addr_filters_init(event)) {
+ x86_del_exclusive(x86_lbr_exclusive_pt);
+ return -ENOMEM;
+ }
+
event->destroy = pt_event_destroy;
return 0;
@@ -1152,6 +1325,14 @@ static __init int pt_init(void)
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+
+ if (pt_cap_get(PT_CAP_num_address_ranges)) {
+ pt_pmu.pmu.addr_filters_validate =
+ pt_event_addr_filters_validate;
+ pt_pmu.pmu.addr_filters_setup =
+ pt_event_addr_filters_setup;
+ }
+
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
return ret;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 6ce8cd20b9..27bd3838c5 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -105,13 +105,41 @@ struct pt_buffer {
struct topa_entry *topa_index[0];
};
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a: range start, goes to RTIT_ADDRn_A
+ * @msr_b: range end, goes to RTIT_ADDRn_B
+ * @config: 4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned long config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter: filters defined for this context
+ * @offs: array of mapped object's offsets
+ * @nr_filters: number of defined filters in the @filter array
+ */
+struct pt_filters {
+ struct pt_filter filter[PT_FILTERS_NUM];
+ unsigned long offs[PT_FILTERS_NUM];
+ unsigned int nr_filters;
+};
+
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
+ * @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
*/
struct pt {
struct perf_output_handle handle;
+ struct pt_filters filters;
int handle_nmi;
};
--
2.8.0.rc3