[PATCH 02/10] perf_events: Add fast-path to the rescheduling code

From: Peter Zijlstra
Date: Fri Jan 22 2010 - 10:59:40 EST


The 6th version does:
- implement correct fastpath scheduling, i.e., reuse previous assignment
- skip reprogramming counters in hw_perf_enable() with a generation number

Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
LKML-Reference: <4b588464.1818d00a.4456.383b@xxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event.c | 148 +++++++++++++++++++++++----------------
include/linux/perf_event.h | 2
2 files changed, 93 insertions(+), 57 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -87,6 +87,7 @@ struct cpu_hw_events {
int n_events;
int n_added;
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
+ u64 tags[X86_PMC_IDX_MAX];
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
};

@@ -1013,6 +1014,8 @@ static int __hw_perf_event_init(struct p
hwc->config = ARCH_PERFMON_EVENTSEL_INT;

hwc->idx = -1;
+ hwc->last_cpu = -1;
+ hwc->last_tag = ~0ULL;

/*
* Count user and OS events unless requested not to.
@@ -1245,6 +1248,46 @@ static int x86_schedule_events(struct cp
}

/*
+ * fastpath, try to reuse previous register
+ */
+ for (i = 0, num = n; i < n; i++, num--) {
+ hwc = &cpuc->event_list[i]->hw;
+ c = (unsigned long *)constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+#if 0
+ pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n",
+ smp_processor_id(),
+ hwc->config,
+ hwc->idx,
+ assign ? 'y' : 'n');
+#endif
+
+ set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ if (!num)
+ goto done;
+
+ /*
+ * begin slow path
+ */
+
+ bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+
+ /*
* weight = number of possible counters
*
* 1 = most constrained, only works on one counter
@@ -1263,10 +1306,9 @@ static int x86_schedule_events(struct cp
if (x86_pmu.num_events_fixed)
wmax++;

- num = n;
- for (w = 1; num && w <= wmax; w++) {
+ for (w = 1, num = n; num && w <= wmax; w++) {
/* for each event */
- for (i = 0; i < n; i++) {
+ for (i = 0; num && i < n; i++) {
c = (unsigned long *)constraints[i];
hwc = &cpuc->event_list[i]->hw;

@@ -1274,28 +1316,6 @@ static int x86_schedule_events(struct cp
if (weight != w)
continue;

- /*
- * try to reuse previous assignment
- *
- * This is possible despite the fact that
- * events or events order may have changed.
- *
- * What matters is the level of constraints
- * of an event and this is constant for now.
- *
- * This is possible also because we always
- * scan from most to least constrained. Thus,
- * if a counter can be reused, it means no,
- * more constrained events, needed it. And
- * next events will either compete for it
- * (which cannot be solved anyway) or they
- * have fewer constraints, and they can use
- * another counter.
- */
- j = hwc->idx;
- if (j != -1 && !test_bit(j, used_mask))
- goto skip;
-
for_each_bit(j, c, X86_PMC_IDX_MAX) {
if (!test_bit(j, used_mask))
break;
@@ -1303,22 +1323,23 @@ static int x86_schedule_events(struct cp

if (j == X86_PMC_IDX_MAX)
break;
-skip:
- set_bit(j, used_mask);

#if 0
- pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n",
+ pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n",
smp_processor_id(),
hwc->config,
j,
assign ? 'y' : 'n');
#endif

+ set_bit(j, used_mask);
+
if (assign)
assign[i] = j;
num--;
}
}
+done:
/*
* scheduling failed or is just a simulation,
* free resources if necessary
@@ -1357,7 +1378,7 @@ static int collect_events(struct cpu_hw_

list_for_each_entry(event, &leader->sibling_list, group_entry) {
if (!is_x86_event(event) ||
- event->state == PERF_EVENT_STATE_OFF)
+ event->state <= PERF_EVENT_STATE_OFF)
continue;

if (n >= max_count)
@@ -1369,11 +1390,15 @@ static int collect_events(struct cpu_hw_
return n;
}

-
static inline void x86_assign_hw_event(struct perf_event *event,
- struct hw_perf_event *hwc, int idx)
+ struct cpu_hw_events *cpuc,
+ int idx)
{
+ struct hw_perf_event *hwc = &event->hw;
+
hwc->idx = idx;
+ hwc->last_cpu = smp_processor_id();
+ hwc->last_tag = ++cpuc->tags[idx];

if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
hwc->config_base = 0;
@@ -1392,6 +1417,14 @@ static inline void x86_assign_hw_event(s
}
}

+static bool match_prev_assignment(struct hw_perf_event *hwc,
+ struct cpu_hw_events *cpuc, int idx)
+{
+ return hwc->idx == cpuc->assign[idx] &&
+ hwc->last_cpu == smp_processor_id() &&
+ hwc->last_tag == cpuc->tags[idx];
+}
+
void hw_perf_enable(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1401,45 +1434,33 @@ void hw_perf_enable(void)

if (!x86_pmu_initialized())
return;
+
if (cpuc->n_added) {
/*
* apply assignment obtained either from
* hw_perf_group_sched_in() or x86_pmu_enable()
*
- * step1: save events moving to new counters
- * step2: reprogram moved events into new counters
+ * We either re-enable or re-program and re-enable.
+ * All events are disabled by the time we come here.
+ * That means their state has been saved already.
*/
for (i = 0; i < cpuc->n_events; i++) {
-
event = cpuc->event_list[i];
hwc = &event->hw;

- if (hwc->idx == -1 || hwc->idx == cpuc->assign[i])
- continue;
-
- x86_pmu.disable(hwc, hwc->idx);
-
- clear_bit(hwc->idx, cpuc->active_mask);
- barrier();
- cpuc->events[hwc->idx] = NULL;
-
- x86_perf_event_update(event, hwc, hwc->idx);
-
- hwc->idx = -1;
- }
-
- for (i = 0; i < cpuc->n_events; i++) {
-
- event = cpuc->event_list[i];
- hwc = &event->hw;
-
- if (hwc->idx == -1) {
- x86_assign_hw_event(event, hwc, cpuc->assign[i]);
+ /*
+ * we can avoid reprogramming counter if:
+ * - assigned same counter as last time
+ * - running on same CPU as last time
+ * - no other event has used the counter since
+ */
+ if (!match_prev_assignment(hwc, cpuc, i)) {
+ x86_assign_hw_event(event, cpuc, cpuc->assign[i]);
x86_perf_event_set_period(event, hwc, hwc->idx);
}
/*
* need to mark as active because x86_pmu_disable()
- * clear active_mask and eventsp[] yet it preserves
+ * clear active_mask and events[] yet it preserves
* idx
*/
set_bit(hwc->idx, cpuc->active_mask);
@@ -2191,6 +2212,8 @@ static void amd_get_event_constraints(st
struct perf_event *event,
u64 *idxmsk)
{
+ /* no constraints, means supports all generic counters */
+ bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events);
}

static int x86_event_sched_in(struct perf_event *event,
@@ -2265,7 +2288,7 @@ int hw_perf_group_sched_in(struct perf_e

n1 = 1;
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
- if (sub->state != PERF_EVENT_STATE_OFF) {
+ if (sub->state > PERF_EVENT_STATE_OFF) {
ret = x86_event_sched_in(sub, cpuctx, cpu);
if (ret)
goto undo;
@@ -2620,12 +2643,23 @@ static int validate_group(struct perf_ev

const struct pmu *hw_perf_event_init(struct perf_event *event)
{
+ const struct pmu *tmp;
int err;

err = __hw_perf_event_init(event);
if (!err) {
+ /*
+ * we temporarily connect event to its pmu
+ * such that validate_group() can classify
+ * it as an x86 event using is_x86_event()
+ */
+ tmp = event->pmu;
+ event->pmu = &pmu;
+
if (event->group_leader != event)
err = validate_group(event);
+
+ event->pmu = tmp;
}
if (err) {
if (event->destroy)
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -478,9 +478,11 @@ struct hw_perf_event {
union {
struct { /* hardware */
u64 config;
+ u64 last_tag;
unsigned long config_base;
unsigned long event_base;
int idx;
+ int last_cpu;
};
struct { /* software */
s64 remaining;

--

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/