[PATCH v6 1/4] ftrace: Implement fs notification for tracing_max_latency

From: Viktor Rosendahl
Date: Thu Sep 05 2019 - 09:25:58 EST


This patch implements the feature that the tracing_max_latency file,
e.g. /sys/kernel/debug/tracing/tracing_max_latency will receive
notifications through the fsnotify framework when a new latency is
available.

One particularly interesting use of this facility is when enabling
threshold tracing, through /sys/kernel/debug/tracing/tracing_thresh,
together with the preempt/irqsoff tracers. This makes it possible to
implement a user space program that can, with equal probability,
obtain traces of latencies that occur immediately after each other in
spite of the fact that the preempt/irqsoff tracers operate in overwrite
mode.

This facility works with the hwlat, preempt/irqsoff, and wakeup
tracers.

The tracers may call the latency_fsnotify() from places such as
__schedule() or do_idle(); this makes it impossible to call
queue_work() directly without risking a deadlock. The same would
happen with a softirq, kernel thread or tasklet. For this reason we
use the irq_work mechanism to call queue_work().

Signed-off-by: Viktor Rosendahl <viktor.rosendahl@xxxxxxxxx>
---
kernel/trace/trace.c | 75 +++++++++++++++++++++++++++++++++++++-
kernel/trace/trace.h | 18 +++++++++
kernel/trace/trace_hwlat.c | 4 +-
3 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 563e80f9006a..72ac20c4aaa1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -44,6 +44,9 @@
#include <linux/trace.h>
#include <linux/sched/clock.h>
#include <linux/sched/rt.h>
+#include <linux/fsnotify.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>

#include "trace.h"
#include "trace_output.h"
@@ -1480,6 +1483,74 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)

unsigned long __read_mostly tracing_thresh;

+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ defined(CONFIG_FSNOTIFY)
+
+static const struct file_operations tracing_max_lat_fops;
+static struct workqueue_struct *fsnotify_wq;
+
+static void latency_fsnotify_workfn(struct work_struct *work)
+{
+ struct trace_array *tr = container_of(work, struct trace_array,
+ fsnotify_work);
+ fsnotify(tr->d_max_latency->d_inode, FS_MODIFY,
+ tr->d_max_latency->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+}
+
+static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
+{
+ struct trace_array *tr = container_of(iwork, struct trace_array,
+ fsnotify_irqwork);
+ queue_work(fsnotify_wq, &tr->fsnotify_work);
+}
+
+static void trace_create_maxlat_file(struct trace_array *tr,
+ struct dentry *d_tracer)
+{
+ INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
+ init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
+ tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
+ d_tracer, &tr->max_latency,
+ &tracing_max_lat_fops);
+}
+
+__init static int latency_fsnotify_init(void)
+{
+ fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
+ WQ_UNBOUND | WQ_HIGHPRI, 0);
+ if (!fsnotify_wq) {
+ pr_err("Unable to allocate tr_max_lat_wq\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+late_initcall_sync(latency_fsnotify_init);
+
+void latency_fsnotify(struct trace_array *tr)
+{
+ if (!fsnotify_wq)
+ return;
+ /*
+ * We cannot call queue_work(&tr->fsnotify_work) from here because it's
+ * possible that we are called from __schedule() or do_idle(), which
+ * could cause a deadlock.
+ */
+ irq_work_queue(&tr->fsnotify_irqwork);
+}
+
+/*
+ * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ * defined(CONFIG_FSNOTIFY)
+ */
+#else
+
+#define trace_create_maxlat_file(tr, d_tracer) \
+ trace_create_file("tracing_max_latency", 0644, d_tracer, \
+ &tr->max_latency, &tracing_max_lat_fops)
+
+#endif
+
#ifdef CONFIG_TRACER_MAX_TRACE
/*
* Copy the new maximum trace into the separate maximum-trace
@@ -1518,6 +1589,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)

/* record this tasks comm */
tracing_record_cmdline(tsk);
+ latency_fsnotify(tr);
}

/**
@@ -8550,8 +8622,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
create_trace_options_dir(tr);

#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
- trace_create_file("tracing_max_latency", 0644, d_tracer,
- &tr->max_latency, &tracing_max_lat_fops);
+ trace_create_maxlat_file(tr, d_tracer);
#endif

if (ftrace_create_function_files(tr, d_tracer))
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 005f08629b8b..4913ed1138aa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -16,6 +16,8 @@
#include <linux/trace_events.h>
#include <linux/compiler.h>
#include <linux/glob.h>
+#include <linux/irq_work.h>
+#include <linux/workqueue.h>

#ifdef CONFIG_FTRACE_SYSCALLS
#include <asm/unistd.h> /* For NR_SYSCALLS */
@@ -264,6 +266,11 @@ struct trace_array {
#endif
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
unsigned long max_latency;
+#ifdef CONFIG_FSNOTIFY
+ struct dentry *d_max_latency;
+ struct work_struct fsnotify_work;
+ struct irq_work fsnotify_irqwork;
+#endif
#endif
struct trace_pid_list __rcu *filtered_pids;
/*
@@ -785,6 +792,17 @@ void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
#endif /* CONFIG_TRACER_MAX_TRACE */

+#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
+ defined(CONFIG_FSNOTIFY)
+
+void latency_fsnotify(struct trace_array *tr);
+
+#else
+
+#define latency_fsnotify(tr) do { } while (0)
+
+#endif
+
#ifdef CONFIG_STACKTRACE
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc);
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index fa95139445b2..9c379261ee89 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -254,8 +254,10 @@ static int get_sample(void)
trace_hwlat_sample(&s);

/* Keep a running maximum ever recorded hardware latency */
- if (sample > tr->max_latency)
+ if (sample > tr->max_latency) {
tr->max_latency = sample;
+ latency_fsnotify(tr);
+ }
}

out:
--
2.17.1