[RESEND PATCH v2 3/3] perf/x86: reuse scarce pmu counters

From: Wen Yang
Date: Sun Mar 13 2022 - 13:22:42 EST


The nmi watchdog may permanently consume a fixed counter (*cycles*),
so when other programs collect *cycles* again, they will occupy a GP.
Here is a slight optimization: save a generic counter for events that
are non-sampling type and using a fixed counter.

Signed-off-by: Wen Yang <simon.wy@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Stephane Eranian <eranian@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: x86@xxxxxxxxxx
Cc: Wen Yang <wenyang@xxxxxxxxxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: linux-perf-users@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
arch/x86/events/core.c | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b6ea220..95cfec6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -799,6 +799,7 @@ struct perf_sched {
u64 msk_counters;
u64 msk_events;
struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
struct sched_state saved[SCHED_STATES_MAX];
};
@@ -846,7 +847,8 @@ static int perf_sched_calc_event(struct event_constraint **constraints,
/*
* Initialize iterator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints,
+static void perf_sched_init(struct perf_sched *sched,
+ struct perf_event **events, struct event_constraint **constraints,
int num, int wmin, int wmax, int gpmax, u64 mevt, u64 mcnt)
{
memset(sched, 0, sizeof(*sched));
@@ -854,6 +856,7 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
sched->max_weight = wmax;
sched->max_gp = gpmax;
sched->constraints = constraints;
+ sched->events = events;
sched->msk_events = mevt;
sched->msk_counters = mcnt;

@@ -896,6 +899,7 @@ static bool perf_sched_restore_state(struct perf_sched *sched)
static bool __perf_sched_find_counter(struct perf_sched *sched)
{
struct event_constraint *c;
+ struct perf_event *e;
int idx;

if (!sched->state.unassigned)
@@ -905,16 +909,17 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
return false;

c = sched->constraints[sched->state.event];
+ e = sched->events[sched->state.event];
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
u64 mask = BIT_ULL(idx);

- if (sched->msk_counters & mask)
+ if ((sched->msk_counters & mask) && is_sampling_event(e))
continue;

- if (sched->state.used & mask)
+ if ((sched->state.used & mask) && is_sampling_event(e))
continue;

sched->state.used |= mask;
@@ -1016,14 +1021,15 @@ static void perf_sched_obtain_used_registers(int *assign, int n, u64 *events, u6
}
}

-static int __perf_assign_events(struct event_constraint **constraints, int n,
+static int __perf_assign_events(struct perf_event **events,
+ struct event_constraint **constraints, int n,
int wmin, int wmax, int gpmax, int *assign)
{
u64 mevt, mcnt;
struct perf_sched sched;

perf_sched_obtain_used_registers(assign, n, &mevt, &mcnt);
- perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax, mevt, mcnt);
+ perf_sched_init(&sched, events, constraints, n, wmin, wmax, gpmax, mevt, mcnt);

do {
if (!perf_sched_find_counter(&sched))
@@ -1035,6 +1041,13 @@ static int __perf_assign_events(struct event_constraint **constraints, int n,
return sched.state.unassigned;
}

+static bool is_pmc_reuseable(struct perf_event *e,
+ struct event_constraint *c)
+{
+ return (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) &&
+ (!is_sampling_event(e));
+}
+
/*
* Assign a counter for each event.
*/
@@ -1043,6 +1056,7 @@ int perf_assign_events(struct perf_event **event_list,
int wmin, int wmax, int gpmax, int *assign)
{
struct event_constraint *c;
+ struct perf_event *e;
struct hw_perf_event *hwc;
u64 used_mask = 0;
int unsched = 0;
@@ -1058,6 +1072,7 @@ int perf_assign_events(struct perf_event **event_list,

hwc = &event_list[i]->hw;
c = constraints[i];
+ e = event_list[i];

/* never assigned */
if (hwc->idx == -1)
@@ -1072,8 +1087,10 @@ int perf_assign_events(struct perf_event **event_list,
mask |= mask << 1;

/* not already used */
- if (used_mask & mask)
- break;
+ if (used_mask & mask) {
+ if (!is_pmc_reuseable(e, c))
+ break;
+ }

used_mask |= mask;

@@ -1083,12 +1100,12 @@ int perf_assign_events(struct perf_event **event_list,

/* slow path */
if (i != n) {
- unsched = __perf_assign_events(constraints, n,
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);

if (unsched) {
memset(assign, -1, n * sizeof(int));
- unsched = __perf_assign_events(constraints, n,
+ unsched = __perf_assign_events(event_list, constraints, n,
wmin, wmax, gpmax, assign);
}
}
--
1.8.3.1