[PATCH 11/15] static_call: Add inline static call infrastructure

From: Peter Zijlstra
Date: Wed Jun 05 2019 - 09:28:10 EST


From: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>

Add infrastructure for an arch-specific CONFIG_HAVE_STATIC_CALL_INLINE
option, which is a faster version of CONFIG_HAVE_STATIC_CALL. At
runtime, the static call sites are patched directly, rather than using
the out-of-line trampolines.

Compared to out-of-line static calls, the performance benefits are more
modest, but still measurable. Steven Rostedt did some tracepoint
measurements:

https://lkml.kernel.org/r/20181126155405.72b4f718@xxxxxxxxxxxxxxxxxx

This code is heavily inspired by the jump label code (aka "static
jumps"), as some of the concepts are very similar.

For more details, see the comments in include/linux/static_call.h.

Cc: x86@xxxxxxxxxx
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Julia Cartwright <julia@xxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
Cc: Jason Baron <jbaron@xxxxxxxxxx>
Cc: Rasmus Villemoes <linux@xxxxxxxxxxxxxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Jiri Kosina <jkosina@xxxxxxx>
Cc: Edward Cree <ecree@xxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: David Laight <David.Laight@xxxxxxxxxx>
Cc: Jessica Yu <jeyu@xxxxxxxxxx>
Cc: Nadav Amit <namit@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/c70ea8c00b93dadcb97b9d83659cf204121372d6.1547073843.git.jpoimboe@xxxxxxxxxx
---
arch/Kconfig | 4
include/asm-generic/vmlinux.lds.h | 7
include/linux/module.h | 10 +
include/linux/static_call.h | 63 +++++++
include/linux/static_call_types.h | 9 +
kernel/Makefile | 1
kernel/module.c | 5
kernel/static_call.c | 316 ++++++++++++++++++++++++++++++++++++++
8 files changed, 414 insertions(+), 1 deletion(-)
create mode 100644 kernel/static_call.c

--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -930,6 +930,10 @@ config LOCK_EVENT_COUNTS
config HAVE_STATIC_CALL
bool

+config HAVE_STATIC_CALL_INLINE
+ bool
+ depends on HAVE_STATIC_CALL
+
source "kernel/gcov/Kconfig"

source "scripts/gcc-plugins/Kconfig"
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -311,6 +311,12 @@
KEEP(*(__jump_table)) \
__stop___jump_table = .;

+#define STATIC_CALL_DATA \
+ . = ALIGN(8); \
+ __start_static_call_sites = .; \
+ KEEP(*(.static_call_sites)) \
+ __stop_static_call_sites = .;
+
/*
* Allow architectures to handle ro_after_init data on their
* own by defining an empty RO_AFTER_INIT_DATA.
@@ -320,6 +326,7 @@
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
+ STATIC_CALL_DATA \
__end_ro_after_init = .;
#endif

--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -21,6 +21,7 @@
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
+#include <linux/static_call_types.h>

#include <linux/percpu.h>
#include <asm/module.h>
@@ -472,6 +473,10 @@ struct module {
unsigned int num_ftrace_callsites;
unsigned long *ftrace_callsites;
#endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ int num_static_call_sites;
+ struct static_call_site *static_call_sites;
+#endif

#ifdef CONFIG_LIVEPATCH
bool klp; /* Is this a livepatch module? */
@@ -728,6 +733,11 @@ static inline bool within_module(unsigne
{
return false;
}
+
+static inline bool within_module_init(unsigned long addr, const struct module *mod)
+{
+ return false;
+}

/* Get/put a kernel symbol (calls should be symmetric) */
#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -47,6 +47,12 @@
* Each static_call() site calls into a trampoline associated with the key.
* The trampoline has a direct branch to the default function. Updates to a
* key will modify the trampoline's branch destination.
+ *
+ * If the arch has CONFIG_HAVE_STATIC_CALL_INLINE, then the call sites
+ * themselves will be patched at runtime to call the functions directly,
+ * rather than calling through the trampoline. This requires objtool or a
+ * compiler plugin to detect all the static_call() sites and annotate them
+ * in the .static_call_sites section.
*/

#include <linux/types.h>
@@ -64,7 +70,62 @@ extern void arch_static_call_transform(v
extern typeof(func) STATIC_CALL_TRAMP(key)


-#if defined(CONFIG_HAVE_STATIC_CALL)
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+struct static_call_key {
+ void *func, *tramp;
+ /*
+ * List of modules (including vmlinux) and their call sites associated
+ * with this key.
+ */
+ struct list_head site_mods;
+};
+
+struct static_call_mod {
+ struct list_head list;
+ struct module *mod; /* for vmlinux, mod == NULL */
+ struct static_call_site *sites;
+};
+
+extern void __static_call_update(struct static_call_key *key, void *func);
+extern int static_call_mod_init(struct module *mod);
+
+#define DEFINE_STATIC_CALL(key, _func) \
+ DECLARE_STATIC_CALL(key, _func); \
+ struct static_call_key key = { \
+ .func = _func, \
+ .tramp = STATIC_CALL_TRAMP(key), \
+ .site_mods = LIST_HEAD_INIT(key.site_mods), \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(key, _func)
+
+/*
+ * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
+ * the symbol table so objtool can reference it when it generates the
+ * static_call_site structs.
+ */
+#define static_call(key, args...) \
+({ \
+ __ADDRESSABLE(key); \
+ STATIC_CALL_TRAMP(key)(args); \
+})
+
+#define static_call_update(key, func) \
+({ \
+ BUILD_BUG_ON(!__same_type(func, STATIC_CALL_TRAMP(key))); \
+ __static_call_update(&key, func); \
+})
+
+#define EXPORT_STATIC_CALL(key) \
+ EXPORT_SYMBOL(key); \
+ EXPORT_SYMBOL(STATIC_CALL_TRAMP(key))
+
+#define EXPORT_STATIC_CALL_GPL(key) \
+ EXPORT_SYMBOL_GPL(key); \
+ EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(key))
+
+
+#elif defined(CONFIG_HAVE_STATIC_CALL)

struct static_call_key {
void *func, *tramp;
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -10,4 +10,13 @@
#define STATIC_CALL_TRAMP(key) __PASTE(STATIC_CALL_TRAMP_PREFIX, key)
#define STATIC_CALL_TRAMP_STR(key) __stringify(STATIC_CALL_TRAMP(key))

+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
#endif /* _STATIC_CALL_TYPES_H */
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o

obj-$(CONFIG_PERF_EVENTS) += events/

--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3117,6 +3117,11 @@ static int find_module_sections(struct m
sizeof(*mod->ei_funcs),
&mod->num_ei_funcs);
#endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ mod->static_call_sites = section_objs(info, ".static_call_sites",
+ sizeof(*mod->static_call_sites),
+ &mod->num_static_call_sites);
+#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);

--- /dev/null
+++ b/kernel/static_call.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/static_call.h>
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/processor.h>
+#include <asm/sections.h>
+
+extern struct static_call_site __start_static_call_sites[],
+ __stop_static_call_sites[];
+
+static bool static_call_initialized;
+
+#define STATIC_CALL_INIT 1UL
+
+/* mutex to protect key modules/sites */
+static DEFINE_MUTEX(static_call_mutex);
+
+static void static_call_lock(void)
+{
+ mutex_lock(&static_call_mutex);
+}
+
+static void static_call_unlock(void)
+{
+ mutex_unlock(&static_call_mutex);
+}
+
+static inline void *static_call_addr(struct static_call_site *site)
+{
+ return (void *)((long)site->addr + (long)&site->addr);
+}
+
+
+static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+{
+ return (struct static_call_key *)
+ (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
+}
+
+/* These assume the key is word-aligned. */
+static inline bool static_call_is_init(struct static_call_site *site)
+{
+ return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
+}
+
+static inline void static_call_set_init(struct static_call_site *site)
+{
+ site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
+ (long)&site->key;
+}
+
+static int static_call_site_cmp(const void *_a, const void *_b)
+{
+ const struct static_call_site *a = _a;
+ const struct static_call_site *b = _b;
+ const struct static_call_key *key_a = static_call_key(a);
+ const struct static_call_key *key_b = static_call_key(b);
+
+ if (key_a < key_b)
+ return -1;
+
+ if (key_a > key_b)
+ return 1;
+
+ return 0;
+}
+
+static void static_call_site_swap(void *_a, void *_b, int size)
+{
+ long delta = (unsigned long)_a - (unsigned long)_b;
+ struct static_call_site *a = _a;
+ struct static_call_site *b = _b;
+ struct static_call_site tmp = *a;
+
+ a->addr = b->addr - delta;
+ a->key = b->key - delta;
+
+ b->addr = tmp.addr + delta;
+ b->key = tmp.key + delta;
+}
+
+static inline void static_call_sort_entries(struct static_call_site *start,
+ struct static_call_site *stop)
+{
+ sort(start, stop - start, sizeof(struct static_call_site),
+ static_call_site_cmp, static_call_site_swap);
+}
+
+void __static_call_update(struct static_call_key *key, void *func)
+{
+ struct static_call_mod *site_mod;
+ struct static_call_site *site, *stop;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ if (key->func == func)
+ goto done;
+
+ key->func = func;
+
+ /*
+ * If called before init, leave the call sites unpatched for now.
+ * In the meantime they'll continue to call the temporary trampoline.
+ */
+ if (!static_call_initialized)
+ goto done;
+
+ list_for_each_entry(site_mod, &key->site_mods, list) {
+ if (!site_mod->sites) {
+ /*
+ * This can happen if the static call key is defined in
+ * a module which doesn't use it.
+ */
+ continue;
+ }
+
+ stop = __stop_static_call_sites;
+
+#ifdef CONFIG_MODULES
+ if (site_mod->mod) {
+ stop = site_mod->mod->static_call_sites +
+ site_mod->mod->num_static_call_sites;
+ }
+#endif
+
+ for (site = site_mod->sites;
+ site < stop && static_call_key(site) == key; site++) {
+ void *site_addr = static_call_addr(site);
+ struct module *mod = site_mod->mod;
+
+ if (static_call_is_init(site)) {
+ /*
+ * Don't write to call sites which were in
+ * initmem and have since been freed.
+ */
+ if (!mod && system_state >= SYSTEM_RUNNING)
+ continue;
+ if (mod && !within_module_init((unsigned long)site_addr, mod))
+ continue;
+ }
+
+ if (!kernel_text_address((unsigned long)site_addr)) {
+ WARN_ONCE(1, "can't patch static call site at %pS",
+ site_addr);
+ continue;
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, func);
+ }
+ }
+
+done:
+ static_call_unlock();
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+
+#ifdef CONFIG_MODULES
+
+static int static_call_add_module(struct module *mod)
+{
+ struct static_call_site *start = mod->static_call_sites;
+ struct static_call_site *stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ struct static_call_site *site;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod;
+
+ if (start == stop)
+ return 0;
+
+ static_call_sort_entries(start, stop);
+
+ for (site = start; site < stop; site++) {
+ void *site_addr = static_call_addr(site);
+
+ if (within_module_init((unsigned long)site_addr, mod))
+ static_call_set_init(site);
+
+ key = static_call_key(site);
+ if (key != prev_key) {
+ prev_key = key;
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod)
+ return -ENOMEM;
+
+ site_mod->mod = mod;
+ site_mod->sites = site;
+ list_add_tail(&site_mod->list, &key->site_mods);
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, key->func);
+ }
+
+ return 0;
+}
+
+static void static_call_del_module(struct module *mod)
+{
+ struct static_call_site *start = mod->static_call_sites;
+ struct static_call_site *stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ struct static_call_site *site;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod;
+
+ for (site = start; site < stop; site++) {
+ key = static_call_key(site);
+ if (key == prev_key)
+ continue;
+ prev_key = key;
+
+ list_for_each_entry(site_mod, &key->site_mods, list) {
+ if (site_mod->mod == mod) {
+ list_del(&site_mod->list);
+ kfree(site_mod);
+ break;
+ }
+ }
+ }
+}
+
+static int static_call_module_notify(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ int ret = 0;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ module_disable_ro(mod);
+ ret = static_call_add_module(mod);
+ module_enable_ro(mod, false);
+ if (ret) {
+ WARN(1, "Failed to allocate memory for static calls");
+ static_call_del_module(mod);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ static_call_del_module(mod);
+ break;
+ }
+
+ static_call_unlock();
+ cpus_read_unlock();
+
+ return notifier_from_errno(ret);
+}
+
+static struct notifier_block static_call_module_nb = {
+ .notifier_call = static_call_module_notify,
+};
+
+#endif /* CONFIG_MODULES */
+
+static void __init static_call_init(void)
+{
+ struct static_call_site *start = __start_static_call_sites;
+ struct static_call_site *stop = __stop_static_call_sites;
+ struct static_call_site *site;
+
+ if (start == stop) {
+ pr_warn("WARNING: empty static call table\n");
+ return;
+ }
+
+ cpus_read_lock();
+ static_call_lock();
+
+ static_call_sort_entries(start, stop);
+
+ for (site = start; site < stop; site++) {
+ struct static_call_key *key = static_call_key(site);
+ void *site_addr = static_call_addr(site);
+
+ if (init_section_contains(site_addr, 1))
+ static_call_set_init(site);
+
+ if (list_empty(&key->site_mods)) {
+ struct static_call_mod *site_mod;
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod) {
+ WARN(1, "Failed to allocate memory for static calls");
+ goto done;
+ }
+
+ site_mod->sites = site;
+ list_add_tail(&site_mod->list, &key->site_mods);
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, key->func);
+ }
+
+ static_call_initialized = true;
+
+done:
+ static_call_unlock();
+ cpus_read_unlock();
+
+#ifdef CONFIG_MODULES
+ if (static_call_initialized)
+ register_module_notifier(&static_call_module_nb);
+#endif
+}
+early_initcall(static_call_init);