[PATCH v3] tracing: Add filter-direct option

From: Steven Rostedt
Date: Mon Dec 18 2023 - 10:32:36 EST


From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx>

Normally, when the filter is enabled, a temporary buffer is created to
copy the event data into it to perform the filtering logic. If the filter
passes and the event should be recorded, then the event is copied from the
temporary buffer into the ring buffer. If the event is to be discarded
then it is simply dropped. If another event comes in via an interrupt, it
will not use the temporary buffer as it is busy and will write directly
into the ring buffer.

The filter-direct option will allow the user to disable this feature. By
default, it is disabled. When enabled, it disables the temporary buffer
and always writes into the ring buffer. This will avoid the copy when the
event is to be recorded, but also adds a bit more overhead on the discard,
and if another event were to interrupt the event that is to be discarded,
then the event will not be removed from the ring buffer but instead
converted to padding that will not be read by the reader. Padding will
still take up space on the ring buffer.

This option is mainly used for kselftests to stress test the ring buffer
discard logic.

Also fix some whitespace (that was fixed by editing this in vscode).

Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
---
Changes since v2: https://lore.kernel.org/linux-trace-kernel/20231215132502.1ae9c1a6@xxxxxxxxxxxxxxxxxxxx

- Changed the option name to filter-direct

- Added the config option CONFIG_TRACE_FILTER_DIRECT

- Moved the documentation from Documentation and into tho Kconfig
This is because the option is focused on being used for kselftest
purposes

kernel/trace/Kconfig | 28 ++++++++++++++++++++++++++++
kernel/trace/trace.c | 39 ++++++++++++++++++++++++---------------
kernel/trace/trace.h | 8 ++++++++
3 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61c541c36596..5362206e6e82 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -949,6 +949,34 @@ config TRACE_EVAL_MAP_FILE

If unsure, say N.

+config TRACE_FILTER_DIRECT
+ bool "Add filter-direct option to test direct ring buffer filtering"
+ help
+ Normally, when the filter is enabled, a temporary buffer is
+ created to copy the event data into it to perform the
+ filtering logic. If the filter passes and the event should
+ be recorded, then the event is copied from the temporary
+ buffer into the ring buffer. If the event is to be discarded
+ then it is simply dropped. If another event comes in via
+ an interrupt, it will not use the temporary buffer as it is
+ busy and will write directly into the ring buffer.
+
+ Enabling this config will create a trace option "filter-direct",
+ that, when enabled, will disable the temporary buffer and always
+ write directly into the ring buffer. This will avoid the copy when
+ the event is to be recorded, but also adds a bit more
+ overhead on the discard, and if another event were to interrupt
+ the event that is to be discarded, then the event will not
+ be removed from the ring buffer but instead converted to
+ padding that will not be read by the reader. Padding will
+ still take up space on the ring buffer.
+
+ This option is to allow kselftest to test the ring buffer filter
+ direct functionality that sometimes gets performed. This option
+ degrades the performance of trace filtering.
+
+ Unless you are running kselftests, say N
+
config FTRACE_RECORD_RECURSION
bool "Record functions that recurse in function tracing"
depends on FUNCTION_TRACER
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 55dabee4c78b..36df0364cb9e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5398,6 +5398,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
return 0;
}

+static int __tracing_set_filter_buffering(struct trace_array *tr, bool set);
+
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
int *map;
@@ -5451,6 +5453,11 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_FUNC_FORK)
ftrace_pid_follow_fork(tr, enabled);

+#ifdef CONFIG_TRACE_FILTER_DIRECT
+ if (mask == TRACE_ITER_FILTER_DIRECT)
+ __tracing_set_filter_buffering(tr, enabled);
+#endif
+
if (mask == TRACE_ITER_OVERWRITE) {
ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6464,7 +6471,7 @@ static void tracing_set_nop(struct trace_array *tr)
{
if (tr->current_trace == &nop_trace)
return;
-
+
tr->current_trace->enabled--;

if (tr->current_trace->reset)
@@ -7552,27 +7559,29 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve
return ring_buffer_event_time_stamp(buffer, rbe);
}

-/*
- * Set or disable using the per CPU trace_buffer_event when possible.
- */
-int tracing_set_filter_buffering(struct trace_array *tr, bool set)
+static int __tracing_set_filter_buffering(struct trace_array *tr, bool set)
{
- int ret = 0;
-
- mutex_lock(&trace_types_lock);
-
if (set && tr->no_filter_buffering_ref++)
- goto out;
+ return 0;

if (!set) {
- if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
+ return -EINVAL;

--tr->no_filter_buffering_ref;
}
- out:
+ return 0;
+}
+
+/*
+ * Set or disable using the per CPU trace_buffer_event when possible.
+ */
+int tracing_set_filter_buffering(struct trace_array *tr, bool set)
+{
+ int ret;
+
+ mutex_lock(&trace_types_lock);
+ ret = __tracing_set_filter_buffering(tr, set);
mutex_unlock(&trace_types_lock);

return ret;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79180aed13ee..53ab9ef847e9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1218,6 +1218,13 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
# define STACK_FLAGS
#endif

+#ifdef CONFIG_TRACE_FILTER_DIRECT
+# define DIRECT_FILTER \
+ C(FILTER_DIRECT, "filter-direct"),
+#else
+# define DIRECT_FILTER
+#endif
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
@@ -1254,6 +1261,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
FUNCTION_FLAGS \
FGRAPH_FLAGS \
STACK_FLAGS \
+ DIRECT_FILTER \
BRANCH_FLAGS

/*
--
2.42.0