[PATCH 1/2] ring-buffer: Introduce precise nested timestamp sysfs attribute

From: Suresh E. Warrier
Date: Mon Apr 13 2015 - 22:32:05 EST


Introduce a new tunable nested_precise_ts that allows precise time
stamps to be recorded for nested trace writes.

The tunable is: /sys/kernel/debug/tracing/nested_precise_ts. By default,
it is zero and the behavior is disabled. Set the tunable to 1 in order
to get accurate timestamps.

Signed-off-by: Suresh Warrier <warrier@xxxxxxxxxxxxxxxxxx>
---
Documentation/trace/ftrace.txt | 8 +++++++
include/linux/ring_buffer.h | 9 ++++++++
kernel/trace/ring_buffer.c | 14 ++++++++++++
kernel/trace/trace.c | 51 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 82 insertions(+)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bfe8c29..4d1e83b 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -264,6 +264,14 @@ of ftrace. Here is a list of some of the key files:
one will show only the first kernel function that is called
from user space.

+ nested_precise_ts:
+
+ When set, it will generate precise timestamps even for nested
+ writers which otherwise usually all get the same timestamp as
+ the initial preempted writer. However, this will require briefly
+ disabling interrupts on the local CPU for each trace event, which
+ can be expensive on some architectures.
+
printk_formats:

This is for tools that read the raw format files. If an event in
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 49a4d6f..682147a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/poll.h>
+#include <linux/jump_label.h>

struct ring_buffer;
struct ring_buffer_iter;
@@ -97,6 +98,14 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \
})

+extern struct static_key __precise_nested_write_ts;
+static inline bool rb_precise_nested_write_ts(void)
+{
+ return static_key_false(&__precise_nested_write_ts);
+}
+void rb_enable_precise_nested_write_ts(void);
+void rb_disable_precise_nested_write_ts(void);
+
int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3d9fee3..c9b3005 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2520,6 +2520,20 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
}
}

+struct static_key __precise_nested_write_ts;
+
+void rb_enable_precise_nested_write_ts(void)
+{
+ if (!rb_precise_nested_write_ts())
+ static_key_slow_inc(&__precise_nested_write_ts);
+}
+
+void rb_disable_precise_nested_write_ts(void)
+{
+ if (rb_precise_nested_write_ts())
+ static_key_slow_dec(&__precise_nested_write_ts);
+}
+
static struct ring_buffer_event *
rb_reserve_next_event(struct ring_buffer *buffer,
struct ring_buffer_per_cpu *cpu_buffer,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 18cdf91..f27dcde 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3486,6 +3486,7 @@ static const char readme_msg[] =
"\t\t\t Remove sub-buffer with rmdir\n"
" trace_options\t\t- Set format or modify how tracing happens\n"
"\t\t\t Disable an option by adding a suffix 'no' to the option name\n"
+ " nested_precise_ts\t- Enable/disable precise timestamps for nested writes\n"
#ifdef CONFIG_DYNAMIC_FTRACE
"\n available_filter_functions - list of functions that can be filtered on\n"
" set_ftrace_filter\t- echo function name in here to only trace these functions\n"
@@ -4680,6 +4681,45 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
return ret;
}

+static ssize_t
+tracing_nested_precise_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ int r;
+
+ r = rb_precise_nested_write_ts() ? 1 : 0;
+ r = sprintf(buf, "%d\n", r);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+tracing_nested_precise_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ struct ring_buffer *buffer = tr->trace_buffer.buffer;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ if (buffer) {
+ mutex_lock(&trace_types_lock);
+ if (val)
+ rb_enable_precise_nested_write_ts();
+ else
+ rb_disable_precise_nested_write_ts();
+ mutex_unlock(&trace_types_lock);
+ }
+
+ (*ppos)++;
+
+ return cnt;
+}
struct ftrace_buffer_info {
struct trace_iterator iter;
void *spare;
@@ -4910,6 +4950,14 @@ static const struct file_operations trace_clock_fops = {
.write = tracing_clock_write,
};

+static const struct file_operations tracing_nested_precise_fops = {
+ .open = tracing_open_generic_tr,
+ .read = tracing_nested_precise_read,
+ .write = tracing_nested_precise_write,
+ .llseek = generic_file_llseek,
+ .release = tracing_release_generic_tr,
+};
+
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
.open = tracing_snapshot_open,
@@ -6160,6 +6208,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
trace_create_file("trace_clock", 0644, d_tracer, tr,
&trace_clock_fops);

+ trace_create_file("nested_precise_ts", 0644, d_tracer,
+ tr, &tracing_nested_precise_fops);
+
trace_create_file("tracing_on", 0644, d_tracer,
tr, &rb_simple_fops);

-- 1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/