[PATCH v2 2/4] static_call: Add static call infrastructure

From: Josh Poimboeuf
Date: Mon Nov 26 2018 - 08:55:16 EST


Add a static call infrastructure. Static calls use code patching to
hard-code function pointers into direct branch instructions. They give
the flexibility of function pointers, but with improved performance.
This is especially important for cases where retpolines would otherwise
be used, as retpolines can significantly impact performance.

The concept and code are an extension of previous work done by Ard
Biesheuvel and Steven Rostedt:

https://lkml.kernel.org/r/20181005081333.15018-1-ard.biesheuvel@xxxxxxxxxx
https://lkml.kernel.org/r/20181006015110.653946300@xxxxxxxxxxx

This code is also heavily inspired by the jump label code (aka "static
jumps"), as some of the concepts are very similar.

There are three implementations, depending on arch support:

1) inline: patched call sites (CONFIG_HAVE_STATIC_CALL_INLINE)
2) out-of-line: patched trampolines (CONFIG_HAVE_STATIC_CALL_OUTLINE)
3) basic function pointers

For more details, see the comments in include/linux/static_call.h.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/Kconfig | 10 +
include/asm-generic/vmlinux.lds.h | 11 +
include/linux/module.h | 10 +
include/linux/static_call.h | 202 +++++++++++++++++
include/linux/static_call_types.h | 19 ++
kernel/Makefile | 1 +
kernel/module.c | 5 +
kernel/static_call.c | 350 ++++++++++++++++++++++++++++++
8 files changed, 608 insertions(+)
create mode 100644 include/linux/static_call.h
create mode 100644 include/linux/static_call_types.h
create mode 100644 kernel/static_call.c

diff --git a/arch/Kconfig b/arch/Kconfig
index e1e540ffa979..4474f2958e03 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -879,6 +879,16 @@ config HAVE_ARCH_PREL32_RELOCATIONS
architectures, and don't require runtime relocation on relocatable
kernels.

+config HAVE_STATIC_CALL_INLINE
+ bool
+
+config HAVE_STATIC_CALL_OUTLINE
+ bool
+
+config HAVE_STATIC_CALL
+ def_bool y
+ depends on HAVE_STATIC_CALL_INLINE || HAVE_STATIC_CALL_OUTLINE
+
source "kernel/gcov/Kconfig"

source "scripts/gcc-plugins/Kconfig"
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..f2729831c8b8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -320,6 +320,7 @@
__start_ro_after_init = .; \
*(.data..ro_after_init) \
JUMP_TABLE_DATA \
+ STATIC_CALL_SITES \
__end_ro_after_init = .;
#endif

@@ -725,6 +726,16 @@
#define BUG_TABLE
#endif

+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+#define STATIC_CALL_SITES \
+ . = ALIGN(8); \
+ __start_static_call_sites = .; \
+ KEEP(*(.static_call_sites)) \
+ __stop_static_call_sites = .;
+#else
+#define STATIC_CALL_SITES
+#endif
+
#ifdef CONFIG_UNWINDER_ORC
#define ORC_UNWIND_TABLE \
. = ALIGN(4); \
diff --git a/include/linux/module.h b/include/linux/module.h
index fce6b4335e36..d7c575759931 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -21,6 +21,7 @@
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
+#include <linux/static_call_types.h>

#include <linux/percpu.h>
#include <asm/module.h>
@@ -450,6 +451,10 @@ struct module {
unsigned int num_ftrace_callsites;
unsigned long *ftrace_callsites;
#endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ int num_static_call_sites;
+ struct static_call_site *static_call_sites;
+#endif

#ifdef CONFIG_LIVEPATCH
bool klp; /* Is this a livepatch module? */
@@ -682,6 +687,11 @@ static inline bool is_module_text_address(unsigned long addr)
return false;
}

+static inline bool within_module_init(unsigned long addr, const struct module *mod)
+{
+ return false;
+}
+
/* Get/put a kernel symbol (calls should be symmetric) */
#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
#define symbol_put(x) do { } while (0)
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
new file mode 100644
index 000000000000..c8d0da1ef6b2
--- /dev/null
+++ b/include/linux/static_call.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STATIC_CALL_H
+#define _LINUX_STATIC_CALL_H
+
+/*
+ * Static call support
+ *
+ * Static calls use code patching to hard-code function pointers into direct
+ * branch instructions. They give the flexibility of function pointers, but
+ * with improved performance. This is especially important for cases where
+ * retpolines would otherwise be used, as retpolines can significantly impact
+ * performance.
+ *
+ *
+ * API overview:
+ *
+ * DECLARE_STATIC_CALL(key, func);
+ * DEFINE_STATIC_CALL(key, func);
+ * static_call(key, args...);
+ * static_call_update(key, func);
+ *
+ *
+ * Usage example:
+ *
+ * # Start with the following functions (with identical prototypes):
+ * int func_a(int arg1, int arg2);
+ * int func_b(int arg1, int arg2);
+ *
+ * # Define a 'my_key' reference, associated with func_a() by default
+ * DEFINE_STATIC_CALL(my_key, func_a);
+ *
+ * # Call func_a()
+ * static_call(my_key, arg1, arg2);
+ *
+ * # Update 'my_key' to point to func_b()
+ * static_call_update(my_key, func_b);
+ *
+ * # Call func_b()
+ * static_call(my_key, arg1, arg2);
+ *
+ *
+ * Implementation details:
+ *
+ * There are three different implementations:
+ *
+ * 1) Inline static calls (patched call sites)
+ *
+ * This requires objtool, which detects all the static_call() sites and
+ * annotates them in the '.static_call_sites' section. By default, the call
+ * sites will call into a temporary per-key trampoline which has an indirect
+ * branch to the current destination function associated with the key.
+ * During system boot (or module init), all call sites are patched to call
+ * their destination functions directly. Updates to a key will patch all
+ * call sites associated with that key.
+ *
+ * 2) Out-of-line static calls (patched trampolines)
+ *
+ * Each static_call() site calls into a permanent trampoline associated with
+ * the key. The trampoline has a direct branch to the default function.
+ * Updates to a key will modify the direct branch in the key's trampoline.
+ *
+ * 3) Generic implementation
+ *
+ * This is the default implementation if the architecture hasn't implemented
+ * static calls (either inline or out-of-line). In this case, a basic
+ * function pointer is used.
+ */
+
+#include <linux/types.h>
+#include <linux/cpu.h>
+#include <linux/static_call_types.h>
+
+#ifdef CONFIG_HAVE_STATIC_CALL
+#include <asm/static_call.h>
+extern void arch_static_call_transform(void *site, void *tramp, void *func);
+#endif
+
+
+#define DECLARE_STATIC_CALL(key, func) \
+ extern struct static_call_key key; \
+ extern typeof(func) STATIC_CALL_TRAMP(key)
+
+
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+struct static_call_key {
+ void *func, *tramp;
+ /*
+ * List of modules (including vmlinux) and their call sites associated
+ * with this key.
+ */
+ struct list_head site_mods;
+};
+
+struct static_call_mod {
+ struct list_head list;
+ struct module *mod; /* for vmlinux, mod == NULL */
+ struct static_call_site *sites;
+};
+
+extern void arch_static_call_defuse_tramp(void *site, void *tramp);
+extern void __static_call_update(struct static_call_key *key, void *func);
+extern int static_call_mod_init(struct module *mod);
+
+#define DEFINE_STATIC_CALL(key, _func) \
+ DECLARE_STATIC_CALL(key, _func); \
+ struct static_call_key key = { \
+ .func = _func, \
+ .tramp = STATIC_CALL_TRAMP(key), \
+ .site_mods = LIST_HEAD_INIT(key.site_mods), \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(key, _func)
+
+/*
+ * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
+ * the symbol table so objtool can reference it when it generates the
+ * static_call_site structs.
+ */
+#define static_call(key, args...) \
+({ \
+ __ADDRESSABLE(key); \
+ STATIC_CALL_TRAMP(key)(args); \
+})
+
+#define static_call_update(key, func) \
+({ \
+ BUILD_BUG_ON(!__same_type(func, STATIC_CALL_TRAMP(key))); \
+ __static_call_update(&key, func); \
+})
+
+#define EXPORT_STATIC_CALL(key) \
+ EXPORT_SYMBOL(key); \
+ EXPORT_SYMBOL(STATIC_CALL_TRAMP(key))
+
+#define EXPORT_STATIC_CALL_GPL(key) \
+ EXPORT_SYMBOL_GPL(key); \
+ EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(key))
+
+
+#elif defined(CONFIG_HAVE_STATIC_CALL_OUTLINE)
+
+struct static_call_key {
+ void *func, *tramp;
+};
+
+#define DEFINE_STATIC_CALL(key, _func) \
+ DECLARE_STATIC_CALL(key, _func); \
+ struct static_call_key key = { \
+ .func = _func, \
+ .tramp = STATIC_CALL_TRAMP(key), \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_TRAMP(key, func)
+
+#define static_call(key, args...) STATIC_CALL_TRAMP(key)(args)
+
+#define __static_call_update(key, func) \
+({ \
+ cpus_read_lock(); \
+ arch_static_call_transform(NULL, key->tramp, func); \
+ cpus_read_unlock(); \
+})
+
+#define static_call_update(key, func) \
+({ \
+ BUILD_BUG_ON(!__same_type(func, STATIC_CALL_TRAMP(key))); \
+})
+
+#define EXPORT_STATIC_CALL(key) \
+ EXPORT_SYMBOL(STATIC_CALL_TRAMP(key))
+
+#define EXPORT_STATIC_CALL_GPL(key) \
+ EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(key))
+
+
+#else /* Generic implementation */
+
+struct static_call_key {
+ void *func;
+};
+
+#define DEFINE_STATIC_CALL(key, _func) \
+ DECLARE_STATIC_CALL(key, _func); \
+ struct static_call_key key = { \
+ .func = _func, \
+ }
+
+#define static_call(key, args...) \
+ ((typeof(STATIC_CALL_TRAMP(key))*)(key.func))(args)
+
+#define __static_call_update(key, _func) \
+ WRITE_ONCE(key->func, _func)
+
+#define static_call_update(key, func) \
+ BUILD_BUG_ON(!__same_type(_func, STATIC_CALL_TRAMP(key))); \
+ __static_call_update(key, func)
+
+#define EXPORT_STATIC_CALL(key) EXPORT_SYMBOL(key)
+#define EXPORT_STATIC_CALL_GPL(key) EXPORT_SYMBOL_GPL(key)
+
+#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
+
+#endif /* _LINUX_STATIC_CALL_H */
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
new file mode 100644
index 000000000000..6859b208de6e
--- /dev/null
+++ b/include/linux/static_call_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/stringify.h>
+
+#define STATIC_CALL_TRAMP_PREFIX ____static_call_tramp_
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+
+#define STATIC_CALL_TRAMP(key) __PASTE(STATIC_CALL_TRAMP_PREFIX, key)
+#define STATIC_CALL_TRAMP_STR(key) __stringify(STATIC_CALL_TRAMP(key))
+
+/* The static call site table is created by objtool. */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 7343b3a9bff0..88bc7fa14eb8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/module.c b/kernel/module.c
index 49a405891587..ecad0ee4ffb5 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3121,6 +3121,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
mod->ei_funcs = section_objs(info, "_error_injection_whitelist",
sizeof(*mod->ei_funcs),
&mod->num_ei_funcs);
+#endif
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ mod->static_call_sites = section_objs(info, ".static_call_sites",
+ sizeof(*mod->static_call_sites),
+ &mod->num_static_call_sites);
#endif
mod->extable = section_objs(info, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
diff --git a/kernel/static_call.c b/kernel/static_call.c
new file mode 100644
index 000000000000..88996ebe96e2
--- /dev/null
+++ b/kernel/static_call.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/static_call.h>
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/processor.h>
+#include <asm/sections.h>
+
+extern struct static_call_site __start_static_call_sites[],
+ __stop_static_call_sites[];
+
+static bool static_call_initialized;
+
+#define STATIC_CALL_INIT 1UL
+
+/* mutex to protect key modules/sites */
+static DEFINE_MUTEX(static_call_mutex);
+
+static void static_call_lock(void)
+{
+ mutex_lock(&static_call_mutex);
+}
+
+static void static_call_unlock(void)
+{
+ mutex_unlock(&static_call_mutex);
+}
+
+static inline void *static_call_addr(struct static_call_site *site)
+{
+ return (void *)((long)site->addr + (long)&site->addr);
+}
+
+
+static inline struct static_call_key *static_call_key(const struct static_call_site *site)
+{
+ return (struct static_call_key *)
+ (((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
+}
+
+/* These assume the key is word-aligned. */
+static inline bool static_call_is_init(struct static_call_site *site)
+{
+ return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
+}
+
+static inline void static_call_set_init(struct static_call_site *site)
+{
+ site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
+ (long)&site->key;
+}
+
+static int static_call_site_cmp(const void *_a, const void *_b)
+{
+ const struct static_call_site *a = _a;
+ const struct static_call_site *b = _b;
+ const struct static_call_key *key_a = static_call_key(a);
+ const struct static_call_key *key_b = static_call_key(b);
+
+ if (key_a < key_b)
+ return -1;
+
+ if (key_a > key_b)
+ return 1;
+
+ return 0;
+}
+
+static void static_call_site_swap(void *_a, void *_b, int size)
+{
+ long delta = (unsigned long)_a - (unsigned long)_b;
+ struct static_call_site *a = _a;
+ struct static_call_site *b = _b;
+ struct static_call_site tmp = *a;
+
+ a->addr = b->addr - delta;
+ a->key = b->key - delta;
+
+ b->addr = tmp.addr + delta;
+ b->key = tmp.key + delta;
+}
+
+static inline void static_call_sort_entries(struct static_call_site *start,
+ struct static_call_site *stop)
+{
+ sort(start, stop - start, sizeof(struct static_call_site),
+ static_call_site_cmp, static_call_site_swap);
+}
+
+void __static_call_update(struct static_call_key *key, void *func)
+{
+ struct static_call_mod *site_mod;
+ struct static_call_site *site, *stop;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ if (key->func == func)
+ goto done;
+
+ key->func = func;
+
+ /*
+ * If called before init, leave the call sites unpatched for now.
+ * In the meantime they'll continue to call the temporary trampoline.
+ */
+ if (!static_call_initialized)
+ goto done;
+
+ list_for_each_entry(site_mod, &key->site_mods, list) {
+ if (!site_mod->sites) {
+ /*
+ * This can happen if the static call key is defined in
+ * a module which doesn't use it.
+ */
+ continue;
+ }
+
+ stop = __stop_static_call_sites;
+
+#ifdef CONFIG_MODULES
+ if (site_mod->mod) {
+ stop = site_mod->mod->static_call_sites +
+ site_mod->mod->num_static_call_sites;
+ }
+#endif
+
+ for (site = site_mod->sites;
+ site < stop && static_call_key(site) == key; site++) {
+ void *site_addr = static_call_addr(site);
+ struct module *mod = site_mod->mod;
+
+ if (static_call_is_init(site)) {
+ /*
+ * Don't write to call sites which were in
+ * initmem and have since been freed.
+ */
+ if (!mod && system_state >= SYSTEM_RUNNING)
+ continue;
+ if (mod && (mod->state == MODULE_STATE_LIVE ||
+ mod->state == MODULE_STATE_GOING))
+ continue;
+ }
+
+ if (!kernel_text_address((unsigned long)site_addr)) {
+ WARN_ONCE(1, "can't patch static call site at %pS",
+ site_addr);
+ continue;
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, func);
+ }
+ }
+
+done:
+ static_call_unlock();
+ cpus_read_unlock();
+}
+EXPORT_SYMBOL_GPL(__static_call_update);
+
+/*
+ * On arches without PLTs, the trampolines will no longer be used and can be
+ * poisoned.
+ *
+ * Other arches may continue to reuse the trampolines in cases where the
+ * destination function is too far away from the call site.
+ */
+static void static_call_defuse_tramps(struct static_call_site *start,
+ struct static_call_site *stop)
+{
+ struct static_call_site *site;
+ struct static_call_key *key;
+ struct static_call_key *prev_key = NULL;
+
+ for (site = start; site < stop; site++) {
+ key = static_call_key(site);
+
+ if (key != prev_key) {
+ prev_key = key;
+ arch_static_call_defuse_tramp(static_call_addr(site),
+ key->tramp);
+ }
+ }
+}
+
+#ifdef CONFIG_MODULES
+
+static int static_call_add_module(struct module *mod)
+{
+ struct static_call_site *start = mod->static_call_sites;
+ struct static_call_site *stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ struct static_call_site *site;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod;
+
+ if (start == stop)
+ return 0;
+
+ static_call_sort_entries(start, stop);
+
+ for (site = start; site < stop; site++) {
+ void *site_addr = static_call_addr(site);
+
+ if (within_module_init((unsigned long)site_addr, mod))
+ static_call_set_init(site);
+
+ key = static_call_key(site);
+ if (key != prev_key) {
+ prev_key = key;
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod)
+ return -ENOMEM;
+
+ site_mod->mod = mod;
+ site_mod->sites = site;
+ list_add_tail(&site_mod->list, &key->site_mods);
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, key->func);
+ }
+
+ /*
+ * If a tramp is used across modules, it may be defused more than once.
+ * This should be idempotent.
+ */
+ static_call_defuse_tramps(start, stop);
+
+ return 0;
+}
+
+static void static_call_del_module(struct module *mod)
+{
+ struct static_call_site *start = mod->static_call_sites;
+ struct static_call_site *stop = mod->static_call_sites +
+ mod->num_static_call_sites;
+ struct static_call_site *site;
+ struct static_call_key *key, *prev_key = NULL;
+ struct static_call_mod *site_mod;
+
+ for (site = start; site < stop; site++) {
+ key = static_call_key(site);
+ if (key == prev_key)
+ continue;
+ prev_key = key;
+
+ list_for_each_entry(site_mod, &key->site_mods, list) {
+ if (site_mod->mod == mod) {
+ list_del(&site_mod->list);
+ kfree(site_mod);
+ break;
+ }
+ }
+ }
+}
+
+static int static_call_module_notify(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+ int ret = 0;
+
+ cpus_read_lock();
+ static_call_lock();
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ module_disable_ro(mod);
+ ret = static_call_add_module(mod);
+ module_enable_ro(mod, false);
+ if (ret) {
+ WARN(1, "Failed to allocate memory for static calls");
+ static_call_del_module(mod);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ static_call_del_module(mod);
+ break;
+ }
+
+ static_call_unlock();
+ cpus_read_unlock();
+
+ return notifier_from_errno(ret);
+}
+
+static struct notifier_block static_call_module_nb = {
+ .notifier_call = static_call_module_notify,
+};
+
+#endif /* CONFIG_MODULES */
+
+static void __init static_call_init(void)
+{
+ struct static_call_site *start = __start_static_call_sites;
+ struct static_call_site *stop = __stop_static_call_sites;
+ struct static_call_site *site;
+
+ if (start == stop) {
+ pr_warn("WARNING: empty static call table\n");
+ return;
+ }
+
+ cpus_read_lock();
+ static_call_lock();
+
+ static_call_sort_entries(start, stop);
+
+ for (site = start; site < stop; site++) {
+ struct static_call_key *key = static_call_key(site);
+ void *site_addr = static_call_addr(site);
+
+ if (init_section_contains(site_addr, 1))
+ static_call_set_init(site);
+
+ if (list_empty(&key->site_mods)) {
+ struct static_call_mod *site_mod;
+
+ site_mod = kzalloc(sizeof(*site_mod), GFP_KERNEL);
+ if (!site_mod) {
+ WARN(1, "Failed to allocate memory for static calls");
+ goto done;
+ }
+
+ site_mod->sites = site;
+ list_add_tail(&site_mod->list, &key->site_mods);
+ }
+
+ arch_static_call_transform(site_addr, key->tramp, key->func);
+ }
+
+ static_call_defuse_tramps(start, stop);
+
+ static_call_initialized = true;
+
+done:
+ static_call_unlock();
+ cpus_read_unlock();
+
+#ifdef CONFIG_MODULES
+ if (static_call_initialized)
+ register_module_notifier(&static_call_module_nb);
+#endif
+}
+early_initcall(static_call_init);
--
2.17.2