[tip: perf/core] perf: Move swevent_htable::recursion into task_struct.

From: tip-bot2 for Sebastian Andrzej Siewior
Date: Tue Jul 09 2024 - 07:43:14 EST


The following commit has been merged into the perf/core branch of tip:

Commit-ID: 0d40a6d83e3e6751f1107ba33587262d937c969f
Gitweb: https://git.kernel.org/tip/0d40a6d83e3e6751f1107ba33587262d937c969f
Author: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
AuthorDate: Thu, 04 Jul 2024 19:03:39 +02:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 09 Jul 2024 13:26:36 +02:00

perf: Move swevent_htable::recursion into task_struct.

The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Tested-by: Marco Elver <elver@xxxxxxxxxx>
Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@xxxxxxxxxxxxx
---
include/linux/perf_event.h | 6 ------
include/linux/sched.h | 7 +++++++
kernel/events/core.c | 13 +++----------
kernel/events/internal.h | 2 +-
4 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ea0d824..99a7ea1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -970,12 +970,6 @@ struct perf_event_context {
local_t nr_pending;
};

-/*
- * Number of contexts where an event can trigger:
- * task, softirq, hardirq, nmi.
- */
-#define PERF_NR_CONTEXTS 4
-
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 61591ac..afb1087 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,12 @@ enum perf_event_task_context {
perf_nr_task_contexts,
};

+/*
+ * Number of contexts where an event can trigger:
+ * task, softirq, hardirq, nmi.
+ */
+#define PERF_NR_CONTEXTS 4
+
struct wake_q_node {
struct wake_q_node *next;
};
@@ -1256,6 +1262,7 @@ struct task_struct {
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
+ u8 perf_recursion[PERF_NR_CONTEXTS];
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 53e2750..b523225 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9763,11 +9763,7 @@ struct swevent_htable {
struct swevent_hlist *swevent_hlist;
struct mutex hlist_mutex;
int hlist_refcount;
-
- /* Recursion avoidance in each contexts */
- u8 recursion[PERF_NR_CONTEXTS];
};
-
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);

/*
@@ -9965,17 +9961,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);

int perf_swevent_get_recursion_context(void)
{
- struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
-
- return get_recursion_context(swhash->recursion);
+ return get_recursion_context(current->perf_recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);

void perf_swevent_put_recursion_context(int rctx)
{
- struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
-
- put_recursion_context(swhash->recursion, rctx);
+ put_recursion_context(current->perf_recursion, rctx);
}

void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
@@ -13642,6 +13634,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{
int ret;

+ memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
child->perf_event_ctxp = NULL;
mutex_init(&child->perf_event_mutex);
INIT_LIST_HEAD(&child->perf_event_list);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 7f06b79..4515144 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -221,7 +221,7 @@ static inline int get_recursion_context(u8 *recursion)
return rctx;
}

-static inline void put_recursion_context(u8 *recursion, int rctx)
+static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{
barrier();
recursion[rctx]--;