Re: [RFC][PATCH 01/18 v2] ftrace: Add hash list to save RCU unsafefunctions

From: Paul E. McKenney
Date: Sat Aug 31 2013 - 15:29:46 EST


On Sat, Aug 31, 2013 at 01:11:18AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@xxxxxxxxxxx>
>
> Some ftrace function tracing callbacks use RCU (perf), thus if
> it gets called from tracing a function outside of the RCU tracking,
> like in entering or leaving NO_HZ idle/userspace, the RCU read locks
> in the callback are useless.
>
> As normal function tracing does not use RCU, and function tracing
> happens to help debug RCU, we do not want to limit all callbacks
> from tracing these "unsafe" RCU functions. Instead, we create an
> infrastructure that will let us mark these unsafe RCU functions
> and we will only allow callbacks that explicitly say they do not
> use RCU to be called by these functions.
>
> This patch adds the infrastructure to create the hash of functions
> that are RCU unsafe. This is done with the FTRACE_UNSAFE_RCU()
> macro. It works like EXPORT_SYMBOL() does. Simply place the macro
> under the RCU unsafe function:
>
> void func(void)
> {
> [...]
> }
> FTRACE_UNSAFE_RCU(func);
>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>

Acked-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>

> ---
> include/asm-generic/vmlinux.lds.h | 10 +++
> include/linux/ftrace.h | 38 +++++++++++
> kernel/trace/ftrace.c | 135 ++++++++++++++++++++++++++++++++++---
> 3 files changed, 174 insertions(+), 9 deletions(-)
>
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 69732d2..fdfddd2 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -93,6 +93,15 @@
> #define MCOUNT_REC()
> #endif
>
> +#ifdef CONFIG_FUNCTION_TRACER
> +#define FTRACE_UNSAFE_RCU() . = ALIGN(8); \
> + VMLINUX_SYMBOL(__start_ftrace_unsafe_rcu) = .; \
> + *(_ftrace_unsafe_rcu) \
> + VMLINUX_SYMBOL(__stop_ftrace_unsafe_rcu) = .;
> +#else
> +#define FTRACE_UNSAFE_RCU()
> +#endif
> +
> #ifdef CONFIG_TRACE_BRANCH_PROFILING
> #define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
> *(_ftrace_annotated_branch) \
> @@ -479,6 +488,7 @@
> MEM_DISCARD(init.data) \
> KERNEL_CTORS() \
> MCOUNT_REC() \
> + FTRACE_UNSAFE_RCU() \
> *(.init.rodata) \
> FTRACE_EVENTS() \
> TRACE_SYSCALLS() \
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 9f15c00..1d17a82 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -92,6 +92,9 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
> * STUB - The ftrace_ops is just a place holder.
> * INITIALIZED - The ftrace_ops has already been initialized (first use time
> * register_ftrace_function() is called, it will initialized the ops)
> + * RCU_SAFE - The callback uses no rcu type locking. This allows the
> + * callback to be called in locations that RCU is not active.
> + * (like going into or coming out of idle NO_HZ)
> */
> enum {
> FTRACE_OPS_FL_ENABLED = 1 << 0,
> @@ -103,6 +106,7 @@ enum {
> FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6,
> FTRACE_OPS_FL_STUB = 1 << 7,
> FTRACE_OPS_FL_INITIALIZED = 1 << 8,
> + FTRACE_OPS_FL_RCU_SAFE = 1 << 9,
> };
>
> struct ftrace_ops {
> @@ -219,6 +223,40 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
> extern void ftrace_stub(unsigned long a0, unsigned long a1,
> struct ftrace_ops *op, struct pt_regs *regs);
>
> +/*
> + * In order to speed up boot, save both the name and the
> + * address of the function to find to add to hashes. The
> + * list of function mcount addresses are sorted by the address,
> + * but we still need to use the name to find the actual location
> + * as the mcount call is usually not at the address of the
> + * start of the function.
> + */
> +struct ftrace_func_finder {
> + const char *name;
> + unsigned long ip;
> +};
> +
> +/*
> + * For functions that are called when RCU is not tracking the CPU
> + * (like for entering or leaving NO_HZ mode, and RCU then ignores
> + * the CPU), they need to be marked with FTRACE_UNSAFE_RCU().
> + * This will prevent all function tracing callbacks from calling
> + * this function unless the callback explicitly states that it
> + * doesn't use RCU with the FTRACE_OPS_FL_RCU_SAFE flag.
> + *
> + * The usage of FTRACE_UNSAFE_RCU() is the same as EXPORT_SYMBOL().
> + * Just place the macro underneath the function that is unsafe.
> + */
> +#define FTRACE_UNSAFE_RCU(func) \
> + static struct ftrace_func_finder __ftrace_unsafe_rcu_##func \
> + __initdata = { \
> + .name = #func, \
> + .ip = (unsigned long)func, \
> + }; \
> + struct ftrace_func_finder *__ftrace_unsafe_rcu_ptr_##func \
> + __attribute__((__section__("_ftrace_unsafe_rcu"))) = \
> + &__ftrace_unsafe_rcu_##func
> +
> #else /* !CONFIG_FUNCTION_TRACER */
> /*
> * (un)register_ftrace_function must be a macro since the ops parameter
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a6d098c..3750360 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1356,6 +1356,23 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
> static void
> ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
>
> +static int ftrace_convert_size_to_bits(int size)
> +{
> + int bits;
> +
> + /*
> + * Make the hash size about 1/2 the # found
> + */
> + for (size /= 2; size; size >>= 1)
> + bits++;
> +
> + /* Don't allocate too much */
> + if (bits > FTRACE_HASH_MAX_BITS)
> + bits = FTRACE_HASH_MAX_BITS;
> +
> + return bits;
> +}
> +
> static int
> ftrace_hash_move(struct ftrace_ops *ops, int enable,
> struct ftrace_hash **dst, struct ftrace_hash *src)
> @@ -1388,15 +1405,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
> goto out;
> }
>
> - /*
> - * Make the hash size about 1/2 the # found
> - */
> - for (size /= 2; size; size >>= 1)
> - bits++;
> -
> - /* Don't allocate too much */
> - if (bits > FTRACE_HASH_MAX_BITS)
> - bits = FTRACE_HASH_MAX_BITS;
> + bits = ftrace_convert_size_to_bits(size);
>
> ret = -ENOMEM;
> new_hash = alloc_ftrace_hash(bits);
> @@ -1486,6 +1495,74 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
> }
>
>
> +static __init int ftrace_find_ip(const void *a, const void *b)
> +{
> + const struct dyn_ftrace *key = a;
> + const struct dyn_ftrace *rec = b;
> +
> + if (key->ip == rec->ip)
> + return 0;
> +
> + if (key->ip < rec->ip) {
> + /* If previous is less than, then this is our func */
> + rec--;
> + if (rec->ip < key->ip)
> + return 0;
> + return -1;
> + }
> +
> + /* All else, we are greater */
> + return 1;
> +}
> +
> +/*
> + * Find the mcount caller location given the ip address of the
> + * function that contains it. As the mcount caller is usually
> + * after the mcount itself.
> + *
> + * Done for just core functions at boot.
> + */
> +static __init unsigned long ftrace_mcount_from_func(unsigned long ip)
> +{
> + struct ftrace_page *pg, *last_pg = NULL;
> + struct dyn_ftrace *rec;
> + struct dyn_ftrace key;
> +
> + key.ip = ip;
> +
> + for (pg = ftrace_pages_start; pg; last_pg = pg, pg = pg->next) {
> + if (ip >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
> + continue;
> +
> + /*
> + * Do the first record manually, as we need to check
> + * the previous record from the previous page
> + */
> + if (ip < pg->records[0].ip) {
> + /* First page? Then we found our record! */
> + if (!last_pg)
> + return pg->records[0].ip;
> +
> + rec = &last_pg->records[last_pg->index - 1];
> + if (rec->ip < ip)
> + return pg->records[0].ip;
> +
> + /* Not found */
> + return 0;
> + }
> +
> + rec = bsearch(&key, pg->records + 1, pg->index - 1,
> + sizeof(struct dyn_ftrace),
> + ftrace_find_ip);
> + if (rec)
> + return rec->ip;
> +
> + break;
> + }
> +
> + return 0;
> +}
> +
> static int ftrace_cmp_recs(const void *a, const void *b)
> {
> const struct dyn_ftrace *key = a;
> @@ -4211,6 +4288,44 @@ struct notifier_block ftrace_module_exit_nb = {
> .priority = INT_MIN, /* Run after anything that can remove kprobes */
> };
>
> +extern struct ftrace_func_finder *__start_ftrace_unsafe_rcu[];
> +extern struct ftrace_func_finder *__stop_ftrace_unsafe_rcu[];
> +
> +static struct ftrace_hash *ftrace_unsafe_rcu;
> +
> +static void __init create_unsafe_rcu_hash(void)
> +{
> + struct ftrace_func_finder *finder;
> + struct ftrace_func_finder **p;
> + unsigned long ip;
> + char str[KSYM_SYMBOL_LEN];
> + int count;
> +
> + count = __stop_ftrace_unsafe_rcu - __start_ftrace_unsafe_rcu;
> + if (!count)
> + return;
> +
> + count = ftrace_convert_size_to_bits(count);
> + ftrace_unsafe_rcu = alloc_ftrace_hash(count);
> +
> + for (p = __start_ftrace_unsafe_rcu;
> + p < __stop_ftrace_unsafe_rcu;
> + p++) {
> + finder = *p;
> +
> + ip = ftrace_mcount_from_func(finder->ip);
> + if (WARN_ON_ONCE(!ip))
> + continue;
> +
> + /* Make sure this is our ip */
> + kallsyms_lookup(ip, NULL, NULL, NULL, str);
> + if (WARN_ON_ONCE(strcmp(str, finder->name) != 0))
> + continue;
> +
> + add_hash_entry(ftrace_unsafe_rcu, ip);
> + }
> +}
> +
> extern unsigned long __start_mcount_loc[];
> extern unsigned long __stop_mcount_loc[];
>
> @@ -4250,6 +4365,8 @@ void __init ftrace_init(void)
> if (ret)
> pr_warning("Failed to register trace ftrace module exit notifier\n");
>
> + create_unsafe_rcu_hash();
> +
> set_ftrace_early_filters();
>
> return;
> --
> 1.7.10.4
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/