Re: [PATCH v2] tracing: Remove definition of trace_*_rcuidle()
From: Steven Rostedt
Date: Tue Dec 03 2024 - 22:02:01 EST
On Tue, 3 Dec 2024 17:48:33 -0800
Guenter Roeck <linux@xxxxxxxxxxxx> wrote:
> Hmm. If you say so. Note that powerpc has the same or a similar problem.
>
> [ 0.142039][ T0] RCU not watching for tracepoint
> [ 0.142488][ T0]
> [ 0.142659][ T0] =============================
> [ 0.142755][ T0] WARNING: suspicious RCU usage
> [ 0.142914][ T0] 6.13.0-rc1-00058-ge75ce84aa5d3 #1 Not tainted
> [ 0.143082][ T0] -----------------------------
> [ 0.143178][ T0] kernel/notifier.c:586 notify_die called but RCU thinks we're quiescent!
>
>
> [ 0.152733][ T0] RCU not watching for tracepoint
> [ 0.152770][ T0]
> [ 0.152995][ T0] =============================
> [ 0.153092][ T0] WARNING: suspicious RCU usage
> [ 0.153187][ T0] 6.13.0-rc1-00058-ge75ce84aa5d3 #1 Not tainted
> [ 0.153301][ T0] -----------------------------
> [ 0.153394][ T0] include/linux/rcupdate.h:850 rcu_read_lock() used illegally while idle!
>
> [ 0.165396][ T0] RCU not watching for tracepoint
> [ 0.165540][ T0]
> [ 0.165712][ T0] =============================
> [ 0.165811][ T0] WARNING: suspicious RCU usage
> [ 0.165909][ T0] 6.13.0-rc1-00058-ge75ce84aa5d3 #1 Not tainted
> [ 0.166026][ T0] -----------------------------
> [ 0.166122][ T0] include/linux/rcupdate.h:878 rcu_read_unlock() used illegally while idle!
>
> and many more.
Grumble. It's just that one file. I wonder if we could just do a hack like
this?
Paul?
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 5c03633316a6..58098873efa9 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -10,11 +10,42 @@
#include <linux/module.h>
#include <linux/ftrace.h>
#include <linux/kprobes.h>
+#include <linux/hardirq.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
#include <trace/events/preemptirq.h>
+/*
+ * Use regular trace points on architectures that implement noinstr
+ * tooling: these calls will only happen with RCU enabled, which can
+ * use a regular tracepoint.
+ *
+ * On older architectures, RCU may not be watching in idle. In that
+ * case, wake up RCU to watch while calling the tracepoint. These
+ * aren't NMI-safe - so exclude NMI contexts:
+ */
+#ifdef CONFIG_ARCH_WANTS_NO_INSTR
+#define trace(point, args) trace_##point(args)
+#else
+#define trace(point, args) \
+ do { \
+ if (trace_##point##_enabled()) { \
+ bool exit_rcu = false; \
+ if (in_nmi()) \
+ break; \
+ if (!IS_ENABLED(CONFIG_TINY_RCU) && \
+ is_idle_task(current)) { \
+ ct_irq_enter(); \
+ exit_rcu = true; \
+ } \
+ trace_##point(args); \
+ if (exit_rcu) \
+ ct_irq_exit(); \
+ } \
+ } while (0)
+#endif
+
#ifdef CONFIG_TRACE_IRQFLAGS
/* Per-cpu variable to prevent redundant calls when IRQs already off */
static DEFINE_PER_CPU(int, tracing_irq_cpu);
@@ -28,7 +59,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu);
void trace_hardirqs_on_prepare(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -39,7 +70,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
- trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_enable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
this_cpu_write(tracing_irq_cpu, 0);
}
@@ -61,7 +92,7 @@ void trace_hardirqs_off_finish(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
@@ -75,7 +106,7 @@ void trace_hardirqs_off(void)
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
- trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+ trace(irq_disable, TP_ARGS(CALLER_ADDR0, CALLER_ADDR1));
}
}
EXPORT_SYMBOL(trace_hardirqs_off);
@@ -86,13 +117,13 @@ NOKPROBE_SYMBOL(trace_hardirqs_off);
void trace_preempt_on(unsigned long a0, unsigned long a1)
{
- trace_preempt_enable(a0, a1);
+ trace(preempt_enable, TP_ARGS(a0, a1));
tracer_preempt_on(a0, a1);
}
void trace_preempt_off(unsigned long a0, unsigned long a1)
{
- trace_preempt_disable(a0, a1);
+ trace(preempt_disable, TP_ARGS(a0, a1));
tracer_preempt_off(a0, a1);
}
#endif
I tested this by forcing x86 to use this code, and it appeared to work.
-- Steve