[PATCH v2 02/11] perf/x86: Improve HT workaround GP counter constraint

From: Peter Zijlstra
Date: Fri May 22 2015 - 09:33:38 EST


The (SNB/IVB/HSW) HT bug only affects events that can be programmed
onto GP counters, therefore we should only limit the number of GP
counters that can be used per cpu -- iow we should not constrain the
FP counters.

Furthermore, we should only enfore such a limit when there are in fact
exclusive events being scheduled on either sibling.

Reported-by: Vince Weaver <vincent.weaver@xxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event.c | 36 +++++++++++++++++++++-----
arch/x86/kernel/cpu/perf_event.h | 11 +++++--
arch/x86/kernel/cpu/perf_event_intel.c | 30 +++++++--------------
arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 -
4 files changed, 49 insertions(+), 30 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -611,6 +611,7 @@ struct sched_state {
int event; /* event index */
int counter; /* counter index */
int unassigned; /* number of events to be assigned left */
+ int nr_gp; /* number of GP counters used */
unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
};

@@ -620,9 +621,10 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
+ int max_gp;
+ int saved_states;
struct event_constraint **constraints;
struct sched_state state;
- int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
};

@@ -630,13 +632,14 @@ struct perf_sched {
* Initialize interator that runs through all events and counters.
*/
static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
- int num, int wmin, int wmax)
+ int num, int wmin, int wmax, int gpmax)
{
int idx;

memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
+ sched->max_gp = gpmax;
sched->constraints = constraints;

for (idx = 0; idx < num; idx++) {
@@ -696,11 +699,16 @@ static bool __perf_sched_find_counter(st
goto done;
}
}
+
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
- if (!__test_and_set_bit(idx, sched->state.used))
+ if (!__test_and_set_bit(idx, sched->state.used)) {
+ if (sched->state.nr_gp++ >= sched->max_gp)
+ return false;
+
goto done;
+ }
}

return false;
@@ -757,11 +765,11 @@ static bool perf_sched_next_event(struct
* Assign a counter for each event.
*/
int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int *assign)
+ int wmin, int wmax, int gpmax, int *assign)
{
struct perf_sched sched;

- perf_sched_init(&sched, constraints, n, wmin, wmax);
+ perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);

do {
if (!perf_sched_find_counter(&sched))
@@ -822,8 +830,24 @@ int x86_schedule_events(struct cpu_hw_ev

/* slow path */
if (i != n) {
+ int gpmax = x86_pmu.num_counters;
+
+ /*
+ * Do not allow scheduling of more than half the available
+ * generic counters.
+ *
+ * This helps avoid counter starvation of sibling thread by
+ * ensuring at most half the counters cannot be in exclusive
+ * mode. There is no designated counters for the limits. Any
+ * N/2 counters can be used. This helps with events with
+ * specific counter constraints.
+ */
+ if (is_ht_workaround_enabled() && !cpuc->is_fake &&
+ READ_ONCE(cpuc->excl_cntrs->exclusive_present))
+ gpmax /= 2;
+
unsched = perf_assign_events(cpuc->event_constraint, n, wmin,
- wmax, assign);
+ wmax, gpmax, assign);
}

/*
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -74,6 +74,7 @@ struct event_constraint {
#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */


struct amd_nb {
@@ -134,8 +135,6 @@ enum intel_excl_state_type {
struct intel_excl_states {
enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
enum intel_excl_state_type state[X86_PMC_IDX_MAX];
- int num_alloc_cntrs;/* #counters allocated */
- int max_alloc_cntrs;/* max #counters allowed */
bool sched_started; /* true if scheduling has started */
};

@@ -144,6 +143,11 @@ struct intel_excl_cntrs {

struct intel_excl_states states[2];

+ union {
+ u16 has_exclusive[2];
+ u32 exclusive_present;
+ };
+
int refcnt; /* per-core: #HT threads */
unsigned core_id; /* per-core: core id */
};
@@ -176,6 +180,7 @@ struct cpu_hw_events {
struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];

+ int n_excl; /* the number of exclusive events */

unsigned int group_flag;
int is_fake;
@@ -719,7 +724,7 @@ static inline void __x86_pmu_enable_even
void x86_pmu_enable_all(int added);

int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int *assign);
+ int wmin, int wmax, int gpmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);

void x86_pmu_stop(struct perf_event *event, int flags);
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1923,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_eve
xl = &excl_cntrs->states[tid];

xl->sched_started = true;
- xl->num_alloc_cntrs = 0;
/*
* lock shared state until we are done scheduling
* in stop_event_scheduling()
@@ -2000,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw
* across HT threads
*/
is_excl = c->flags & PERF_X86_EVENT_EXCL;
+ if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+ event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+ if (!cpuc->n_excl++)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+ }

/*
* xl = state of current HT
@@ -2008,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw
xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid];

- /*
- * do not allow scheduling of more than max_alloc_cntrs
- * which is set to half the available generic counters.
- * this helps avoid counter starvation of sibling thread
- * by ensuring at most half the counters cannot be in
- * exclusive mode. There is not designated counters for the
- * limits. Any N/2 counters can be used. This helps with
- * events with specifix counter constraints
- */
- if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs)
- return &emptyconstraint;
-
cx = c;

/*
@@ -2150,6 +2142,11 @@ static void intel_put_excl_constraints(s

xl = &excl_cntrs->states[tid];
xlo = &excl_cntrs->states[o_tid];
+ if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
+ hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
+ if (!--cpuc->n_excl)
+ WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
+ }

/*
* put_constraint may be called from x86_schedule_events()
@@ -2632,8 +2629,6 @@ static void intel_pmu_cpu_starting(int c
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];

if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
- int h = x86_pmu.num_counters >> 1;
-
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_excl_cntrs *c;

@@ -2647,11 +2642,6 @@ static void intel_pmu_cpu_starting(int c
}
cpuc->excl_cntrs->core_id = core_id;
cpuc->excl_cntrs->refcnt++;
- /*
- * set hard limit to half the number of generic counters
- */
- cpuc->excl_cntrs->states[0].max_alloc_cntrs = h;
- cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;
}
}

--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -394,7 +394,7 @@ static int uncore_assign_events(struct i
/* slow path */
if (i != n)
ret = perf_assign_events(box->event_constraint, n,
- wmin, wmax, assign);
+ wmin, wmax, n, assign);

if (!assign || ret) {
for (i = 0; i < n; i++)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/