[patch V6 02/37] tracing/hwlat: Split ftrace_nmi_enter/exit()

From: Thomas Gleixner
Date: Fri May 15 2020 - 20:10:46 EST


The hardware latency tracer calls into timekeeping and ends up in
various instrumentable functions which is problematic vs. the kprobe
handling especially the text poke machinery. It's invoked from
nmi_enter/exit(), i.e. non-instrumentable code.

Split it into two parts:

1) NMI counter, only invoked on nmi_enter() and noinstr safe

2) NMI timestamping, to be invoked from instrumentable code

Move it into the rcu is watching regions of nmi_enter/exit() even
if there is no actual RCU dependency right now but there is also
no point in having it early.

The actual split of nmi_enter/exit() is done in a separate step.

Requested-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
include/linux/ftrace_irq.h | 31 +++++++++++++++++++------------
include/linux/hardirq.h | 5 +++--
kernel/trace/trace_hwlat.c | 19 ++++++++++++-------
3 files changed, 34 insertions(+), 21 deletions(-)

--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -4,23 +4,30 @@

#ifdef CONFIG_HWLAT_TRACER
extern bool trace_hwlat_callback_enabled;
-extern void trace_hwlat_callback(bool enter);
-#endif
+extern void trace_hwlat_count_nmi(void);
+extern void trace_hwlat_timestamp(bool enter);

-static inline void ftrace_nmi_enter(void)
+static __always_inline void ftrace_count_nmi(void)
{
-#ifdef CONFIG_HWLAT_TRACER
- if (trace_hwlat_callback_enabled)
- trace_hwlat_callback(true);
-#endif
+ if (unlikely(trace_hwlat_callback_enabled))
+ trace_hwlat_count_nmi();
}

-static inline void ftrace_nmi_exit(void)
+static __always_inline void ftrace_nmi_handler_enter(void)
{
-#ifdef CONFIG_HWLAT_TRACER
- if (trace_hwlat_callback_enabled)
- trace_hwlat_callback(false);
-#endif
+ if (unlikely(trace_hwlat_callback_enabled))
+ trace_hwlat_timestamp(true);
}

+static __always_inline void ftrace_nmi_handler_exit(void)
+{
+ if (unlikely(trace_hwlat_callback_enabled))
+ trace_hwlat_timestamp(false);
+}
+#else /* CONFIG_HWLAT_TRACER */
+static inline void ftrace_count_nmi(void) {}
+static inline void ftrace_nmi_handler_enter(void) {}
+static inline void ftrace_nmi_handler_exit(void) {}
+#endif
+
#endif /* _LINUX_FTRACE_IRQ_H */
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -82,20 +82,21 @@ extern void irq_exit(void);
arch_nmi_enter(); \
printk_nmi_enter(); \
lockdep_off(); \
- ftrace_nmi_enter(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \
lockdep_hardirq_enter(); \
+ ftrace_count_nmi(); \
+ ftrace_nmi_handler_enter(); \
} while (0)

#define nmi_exit() \
do { \
+ ftrace_nmi_handler_exit(); \
lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- ftrace_nmi_exit(); \
lockdep_on(); \
printk_nmi_exit(); \
arch_nmi_exit(); \
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -132,21 +132,26 @@ static void trace_hwlat_sample(struct hw
}

/*
+ * Count NMIs in nmi_enter(). Does not take timestamps
+ * because the timestamping callchain cannot be invoked
+ * from noinstr sections.
+ */
+noinstr void trace_hwlat_count_nmi(void)
+{
+ if (smp_processor_id() == nmi_cpu)
+ nmi_count++;
+}
+
+/*
* Timestamping uses ktime_get_mono_fast(), the NMI safe access to
* CLOCK_MONOTONIC.
*/
-void trace_hwlat_callback(bool enter)
+void trace_hwlat_timestamp(bool enter)
{
- if (smp_processor_id() != nmi_cpu)
- return;
-
if (enter)
nmi_ts_start = ktime_get_mono_fast_ns();
else
nmi_total_ts += ktime_get_mono_fast_ns() - nmi_ts_start;
-
- if (enter)
- nmi_count++;
}

/**