[patch V4 part 1 22/36] tracing: Provide lockdep less trace_hardirqs_on/off() variants

From: Thomas Gleixner
Date: Tue May 05 2020 - 10:23:10 EST


trace_hardirqs_on/off() is only partially safe vs. RCU idle. The tracer
core itself is safe, but the resulting tracepoints can be utilized by
e.g. BPF which is unsafe.

Provide variants which do not contain the lockdep invocation so the lockdep
and tracer invocations can be split at the call site and placed properly.

The new variants also do not use rcuidle as they are going to be called
from entry code after/before context tracking.

Name them so they match the lockdep counterparts.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
V3: Renamed to trace_hardirqs_on/off_prepare().
V2: New patch
---
include/linux/irqflags.h | 4 ++++
kernel/trace/trace_preemptirq.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)

--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -29,6 +29,8 @@
#endif

#ifdef CONFIG_TRACE_IRQFLAGS
+ extern void trace_hardirqs_on_prepare(void);
+ extern void trace_hardirqs_off_prepare(void);
extern void trace_hardirqs_on(void);
extern void trace_hardirqs_off(void);
# define lockdep_hardirq_context(p) ((p)->hardirq_context)
@@ -96,6 +98,8 @@ do { \
} while (0)

#else
+# define trace_hardirqs_on_prepare() do { } while (0)
+# define trace_hardirqs_off_prepare() do { } while (0)
# define trace_hardirqs_on() do { } while (0)
# define trace_hardirqs_off() do { } while (0)
# define lockdep_hardirq_context(p) 0
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -19,6 +19,24 @@
/* Per-cpu variable to prevent redundant calls when IRQs already off */
static DEFINE_PER_CPU(int, tracing_irq_cpu);

+/*
+ * Like trace_hardirqs_on() but without the lockdep invocation. This is
+ * used in the low level entry code where the ordering vs. RCU is important
+ * and lockdep uses a staged approach which splits the lockdep hardirq
+ * tracking into a RCU on and a RCU off section.
+ */
+void trace_hardirqs_on_prepare(void)
+{
+ if (this_cpu_read(tracing_irq_cpu)) {
+ if (!in_nmi())
+ trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+ tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1);
+ this_cpu_write(tracing_irq_cpu, 0);
+ }
+}
+EXPORT_SYMBOL(trace_hardirqs_on_prepare);
+NOKPROBE_SYMBOL(trace_hardirqs_on_prepare);
+
void trace_hardirqs_on(void)
{
if (this_cpu_read(tracing_irq_cpu)) {
@@ -33,6 +51,25 @@ void trace_hardirqs_on(void)
EXPORT_SYMBOL(trace_hardirqs_on);
NOKPROBE_SYMBOL(trace_hardirqs_on);

+/*
+ * Like trace_hardirqs_off() but without the lockdep invocation. This is
+ * used in the low level entry code where the ordering vs. RCU is important
+ * and lockdep uses a staged approach which splits the lockdep hardirq
+ * tracking into a RCU on and a RCU off section.
+ */
+void trace_hardirqs_off_prepare(void)
+{
+ if (!this_cpu_read(tracing_irq_cpu)) {
+ this_cpu_write(tracing_irq_cpu, 1);
+ tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1);
+ if (!in_nmi())
+ trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+ }
+
+}
+EXPORT_SYMBOL(trace_hardirqs_off_prepare);
+NOKPROBE_SYMBOL(trace_hardirqs_off_prepare);
+
void trace_hardirqs_off(void)
{
if (!this_cpu_read(tracing_irq_cpu)) {