[RFC][PATCH 1/4] tracepoint: Give priority to probes of tracepoints

From: Steven Rostedt
Date: Thu Oct 22 2015 - 12:10:35 EST


From: "Steven Rostedt (Red Hat)" <rostedt@xxxxxxxxxxx>

In order to guarantee that a probe will be called before other probes that
are attached to a tracepoint, there needs to be a mechanism to provide
priority of one probe over the others.

Adding a prio field to the struct tracepoint_func, which lets the probes be
sorted by the priority set in the structure. If no priority is specified,
then a priority of 10 is given (this is a macro, and perhaps may be changed
in the future).

Now probes may be added to affect other probes that are attached to a
tracepoint with a guaranteed order.

One use case would be to allow tracing of tracepoints be able to filter by
pid. A special (higher priority probe) may be added to the sched_switch
tracepoint and set the necessary flags of the other tracepoints to notify
them if they should be traced or not. In case a tracepoint is enabled at the
sched_switch tracepoint too, the order of the two are not random.

Cc: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
include/linux/tracepoint.h | 13 ++++++++++
kernel/tracepoint.c | 61 +++++++++++++++++++++++++++++++++++++---------
2 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index afada369c5b7..6b79537a42b1 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -26,6 +26,7 @@ struct notifier_block;
struct tracepoint_func {
void *func;
void *data;
+ int prio;
};

struct tracepoint {
@@ -42,9 +43,14 @@ struct trace_enum_map {
unsigned long enum_value;
};

+#define TRACEPOINT_DEFAULT_PRIO 10
+
extern int
tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
extern int
+tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data,
+ int prio);
+extern int
tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data);
extern void
for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
@@ -207,6 +213,13 @@ extern void syscall_unregfunc(void);
(void *)probe, data); \
} \
static inline int \
+ register_trace_prio_##name(void (*probe)(data_proto), void *data,\
+ int prio) \
+ { \
+ return tracepoint_probe_register_prio(&__tracepoint_##name, \
+ (void *)probe, data, prio); \
+ } \
+ static inline int \
unregister_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_unregister(&__tracepoint_##name,\
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 3490407dc7b7..ecd536de603a 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -91,11 +91,13 @@ static void debug_print_probes(struct tracepoint_func *funcs)
printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
}

-static struct tracepoint_func *func_add(struct tracepoint_func **funcs,
- struct tracepoint_func *tp_func)
+static struct tracepoint_func *
+func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
+ int prio)
{
- int nr_probes = 0;
struct tracepoint_func *old, *new;
+ int nr_probes = 0;
+ int pos = -1;

if (WARN_ON(!tp_func->func))
return ERR_PTR(-EINVAL);
@@ -104,18 +106,33 @@ static struct tracepoint_func *func_add(struct tracepoint_func **funcs,
old = *funcs;
if (old) {
/* (N -> N+1), (N != 0, 1) probes */
- for (nr_probes = 0; old[nr_probes].func; nr_probes++)
+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+ /* Insert before probes of lower priority */
+ if (pos < 0 && old[nr_probes].prio < prio)
+ pos = nr_probes;
if (old[nr_probes].func == tp_func->func &&
old[nr_probes].data == tp_func->data)
return ERR_PTR(-EEXIST);
+ }
}
/* + 2 : one for new probe, one for NULL func */
new = allocate_probes(nr_probes + 2);
if (new == NULL)
return ERR_PTR(-ENOMEM);
- if (old)
- memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
- new[nr_probes] = *tp_func;
+ if (old) {
+ if (pos < 0) {
+ pos = nr_probes;
+ memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
+ } else {
+ /* Copy higher priority probes ahead of the new probe */
+ memcpy(new, old, pos * sizeof(struct tracepoint_func));
+ /* Copy the rest after it. */
+ memcpy(new + pos + 1, old + pos,
+ (nr_probes - pos) * sizeof(struct tracepoint_func));
+ }
+ } else
+ pos = 0;
+ new[pos] = *tp_func;
new[nr_probes + 1].func = NULL;
*funcs = new;
debug_print_probes(*funcs);
@@ -174,7 +191,7 @@ static void *func_remove(struct tracepoint_func **funcs,
* Add the probe function to a tracepoint.
*/
static int tracepoint_add_func(struct tracepoint *tp,
- struct tracepoint_func *func)
+ struct tracepoint_func *func, int prio)
{
struct tracepoint_func *old, *tp_funcs;

@@ -183,7 +200,7 @@ static int tracepoint_add_func(struct tracepoint *tp,

tp_funcs = rcu_dereference_protected(tp->funcs,
lockdep_is_held(&tracepoints_mutex));
- old = func_add(&tp_funcs, func);
+ old = func_add(&tp_funcs, func, prio);
if (IS_ERR(old)) {
WARN_ON_ONCE(1);
return PTR_ERR(old);
@@ -240,6 +257,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
* @tp: tracepoint
* @probe: probe handler
* @data: tracepoint data
+ * @prio: priority of this function over other registered functions
*
* Returns 0 if ok, error value on error.
* Note: if @tp is within a module, the caller is responsible for
@@ -247,7 +265,8 @@ static int tracepoint_remove_func(struct tracepoint *tp,
* performed either with a tracepoint module going notifier, or from
* within module exit functions.
*/
-int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
+int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
+ void *data, int prio)
{
struct tracepoint_func tp_func;
int ret;
@@ -255,10 +274,30 @@ int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
mutex_lock(&tracepoints_mutex);
tp_func.func = probe;
tp_func.data = data;
- ret = tracepoint_add_func(tp, &tp_func);
+ tp_func.prio = prio;
+ ret = tracepoint_add_func(tp, &tp_func, prio);
mutex_unlock(&tracepoints_mutex);
return ret;
}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
+
+/**
+ * tracepoint_probe_register - Connect a probe to a tracepoint
+ * @tp: tracepoint
+ * @probe: probe handler
+ * @data: tracepoint data
+ * @prio: priority of this function over other registered functions
+ *
+ * Returns 0 if ok, error value on error.
+ * Note: if @tp is within a module, the caller is responsible for
+ * unregistering the probe before the module is gone. This can be
+ * performed either with a tracepoint module going notifier, or from
+ * within module exit functions.
+ */
+int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
+{
+ return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
+}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);

/**
--
2.6.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/