[PATCH v2 4/4] x86/static_call: Add inline static call implementation for x86-64

From: Josh Poimboeuf
Date: Mon Nov 26 2018 - 08:55:21 EST


Add the inline static call implementation for x86-64. For each key, a
temporary trampoline is created, named __static_call_tramp_<key>. The
trampoline has an indirect jump to the destination function.

Objtool uses the trampoline naming convention to detect all the call
sites. It then annotates those call sites in the .static_call_sites
section.

During boot (and module init), the call sites are patched to call
directly into the destination function. The temporary trampoline is
then no longer used.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/Kconfig | 5 +-
arch/x86/include/asm/static_call.h | 28 +++-
arch/x86/kernel/asm-offsets.c | 6 +
arch/x86/kernel/static_call.c | 30 ++++-
include/linux/static_call.h | 2 +-
tools/objtool/Makefile | 3 +-
tools/objtool/check.c | 126 +++++++++++++++++-
tools/objtool/check.h | 2 +
tools/objtool/elf.h | 1 +
.../objtool/include/linux/static_call_types.h | 19 +++
tools/objtool/sync-check.sh | 1 +
11 files changed, 213 insertions(+), 10 deletions(-)
create mode 100644 tools/objtool/include/linux/static_call_types.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a2a10e0ce248..e099ea87ea70 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -189,7 +189,8 @@ config X86
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
- select HAVE_STATIC_CALL_OUTLINE
+ select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION
+ select HAVE_STATIC_CALL_OUTLINE if !HAVE_STACK_VALIDATION
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
@@ -203,6 +204,7 @@ config X86
select RTC_MC146818_LIB
select SPARSE_IRQ
select SRCU
+ select STACK_VALIDATION if HAVE_STACK_VALIDATION && (HAVE_STATIC_CALL_INLINE || RETPOLINE)
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
select USER_STACKTRACE_SUPPORT
@@ -438,7 +440,6 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
default y
- select STACK_VALIDATION if HAVE_STACK_VALIDATION
help
Compile kernel with the retpoline compiler options to guard against
kernel-to-user data leaks by avoiding speculative indirect
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 6e9ad5969ec2..27bd7da16150 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -2,6 +2,20 @@
#ifndef _ASM_STATIC_CALL_H
#define _ASM_STATIC_CALL_H

+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+/*
+ * This trampoline is only used during boot / module init, so it's safe to use
+ * the indirect branch without a retpoline.
+ */
+#define __ARCH_STATIC_CALL_TRAMP_JMP(key, func) \
+ ANNOTATE_RETPOLINE_SAFE \
+ "jmpq *" __stringify(key) "+" __stringify(SC_KEY_func) "(%rip) \n"
+
+#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
+
/*
* Manually construct a 5-byte direct JMP to prevent the assembler from
* optimizing it into a 2-byte JMP.
@@ -12,9 +26,19 @@
".long " #func " - " __ARCH_STATIC_CALL_JMP_LABEL(key) "\n" \
__ARCH_STATIC_CALL_JMP_LABEL(key) ":"

+#endif /* !CONFIG_HAVE_STATIC_CALL_INLINE */
+
/*
- * This is a permanent trampoline which does a direct jump to the function.
- * The direct jump get patched by static_call_update().
+ * For CONFIG_HAVE_STATIC_CALL_INLINE, this is a temporary trampoline which
+ * uses the current value of the key->func pointer to do an indirect jump to
+ * the function. This trampoline is only used during boot, before the call
+ * sites get patched by static_call_update(). The name of this trampoline has
+ * a magical aspect: objtool uses it to find static call sites so it can create
+ * the .static_call_sites section.
+ *
+ * For CONFIG_HAVE_STATIC_CALL_OUTLINE, this is a permanent trampoline which
+ * does a direct jump to the function. The direct jump gets patched by
+ * static_call_update().
*/
#define ARCH_DEFINE_STATIC_CALL_TRAMP(key, func) \
asm(".pushsection .text, \"ax\" \n" \
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 72adf6c335dc..da8fd220e4f2 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <linux/hardirq.h>
#include <linux/suspend.h>
#include <linux/kbuild.h>
+#include <linux/static_call.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/sigframe.h>
@@ -104,4 +105,9 @@ void common(void) {
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ BLANK();
+ OFFSET(SC_KEY_func, static_call_key, func);
+#endif
}
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 8026d176f25c..d3869295b88d 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -9,13 +9,21 @@

void static_call_bp_handler(void);
void *bp_handler_dest;
+void *bp_handler_continue;

asm(".pushsection .text, \"ax\" \n"
".globl static_call_bp_handler \n"
".type static_call_bp_handler, @function \n"
"static_call_bp_handler: \n"
- "ANNOTATE_RETPOLINE_SAFE \n"
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+ ANNOTATE_RETPOLINE_SAFE
+ "call *bp_handler_dest \n"
+ ANNOTATE_RETPOLINE_SAFE
+ "jmp *bp_handler_continue \n"
+#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
+ ANNOTATE_RETPOLINE_SAFE
"jmp *bp_handler_dest \n"
+#endif
".popsection \n");

void arch_static_call_transform(void *site, void *tramp, void *func)
@@ -25,7 +33,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
unsigned char insn_opcode;
unsigned char opcodes[CALL_INSN_SIZE];

- insn = (unsigned long)tramp;
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
+ insn = (unsigned long)site;
+ else
+ insn = (unsigned long)tramp;

mutex_lock(&text_mutex);

@@ -41,8 +52,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
opcodes[0] = insn_opcode;
memcpy(&opcodes[1], &dest_relative, CALL_INSN_SIZE - 1);

- /* Set up the variable for the breakpoint handler: */
+ /* Set up the variables for the breakpoint handler: */
bp_handler_dest = func;
+ if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE))
+ bp_handler_continue = (void *)(insn + CALL_INSN_SIZE);

/* Patch the call site: */
text_poke_bp((void *)insn, opcodes, CALL_INSN_SIZE,
@@ -52,3 +65,14 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+void arch_static_call_defuse_tramp(void *site, void *tramp)
+{
+ unsigned short opcode = INSN_UD2;
+
+ mutex_lock(&text_mutex);
+ text_poke((void *)tramp, &opcode, 2);
+ mutex_unlock(&text_mutex);
+}
+#endif
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 651f4d784377..6daff586c97d 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -70,7 +70,7 @@
#include <linux/cpu.h>
#include <linux/static_call_types.h>

-#ifdef CONFIG_HAVE_STATIC_CALL
+#if defined(CONFIG_HAVE_STATIC_CALL) && !defined(COMPILE_OFFSETS)
#include <asm/static_call.h>
extern void arch_static_call_transform(void *site, void *tramp, void *func);
#endif
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index c9d038f91af6..fb1afa34f10d 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -29,7 +29,8 @@ all: $(OBJTOOL)

INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
- -I$(srctree)/tools/objtool/arch/$(ARCH)/include
+ -I$(srctree)/tools/objtool/arch/$(ARCH)/include \
+ -I$(srctree)/tools/objtool/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES)
LDFLAGS += -lelf $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0414a0d52262..ea1ff9ea2d78 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -27,6 +27,7 @@

#include <linux/hashtable.h>
#include <linux/kernel.h>
+#include <linux/static_call_types.h>

struct alternative {
struct list_head list;
@@ -165,6 +166,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
"fortify_panic",
"usercopy_abort",
"machine_real_restart",
+ "rewind_stack_do_exit",
};

if (func->bind == STB_WEAK)
@@ -525,6 +527,10 @@ static int add_jump_destinations(struct objtool_file *file)
} else {
/* sibling call */
insn->jump_dest = 0;
+ if (rela->sym->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
continue;
}

@@ -1202,6 +1208,24 @@ static int read_retpoline_hints(struct objtool_file *file)
return 0;
}

+static int read_static_call_tramps(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->bind == STB_GLOBAL &&
+ !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
+ }
+
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1267,6 +1291,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;

+ ret = read_static_call_tramps(file);
+ if (ret)
+ return ret;
+
return 0;
}

@@ -1920,6 +1948,11 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (is_fentry_call(insn))
break;

+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+
ret = dead_end_function(file, insn->call_dest);
if (ret == 1)
return 0;
@@ -2167,6 +2200,89 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
}

+static int create_static_call_sections(struct objtool_file *file)
+{
+ struct section *sec, *rela_sec;
+ struct rela *rela;
+ struct static_call_site *site;
+ struct instruction *insn;
+ char *key_name;
+ struct symbol *key_sym;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".static_call_sites");
+ if (sec) {
+ WARN("file already has .static_call_sites section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->static_call_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites",
+ sizeof(struct static_call_site), idx);
+ if (!sec)
+ return -1;
+
+ rela_sec = elf_create_rela_section(file->elf, sec);
+ if (!rela_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate rela for 'addr' */
+ rela = malloc(sizeof(*rela));
+ if (!rela) {
+ perror("malloc");
+ return -1;
+ }
+ memset(rela, 0, sizeof(*rela));
+ rela->sym = insn->sec->sym;
+ rela->addend = insn->offset;
+ rela->type = R_X86_64_PC32;
+ rela->offset = idx * sizeof(struct static_call_site);
+ list_add_tail(&rela->list, &rela_sec->rela_list);
+ hash_add(rela_sec->rela_hash, &rela->hash, rela->offset);
+
+ /* find key symbol */
+ key_name = insn->call_dest->name + strlen(STATIC_CALL_TRAMP_PREFIX_STR);
+ key_sym = find_symbol_by_name(file->elf, key_name);
+ if (!key_sym) {
+ WARN("can't find static call key symbol: %s", key_name);
+ return -1;
+ }
+
+ /* populate rela for 'key' */
+ rela = malloc(sizeof(*rela));
+ if (!rela) {
+ perror("malloc");
+ return -1;
+ }
+ memset(rela, 0, sizeof(*rela));
+ rela->sym = key_sym;
+ rela->addend = 0;
+ rela->type = R_X86_64_PC32;
+ rela->offset = idx * sizeof(struct static_call_site) + 4;
+ list_add_tail(&rela->list, &rela_sec->rela_list);
+ hash_add(rela_sec->rela_hash, &rela->hash, rela->offset);
+
+ idx++;
+ }
+
+ if (elf_rebuild_rela_section(rela_sec))
+ return -1;
+
+ return 0;
+}
+
static void cleanup(struct objtool_file *file)
{
struct instruction *insn, *tmpinsn;
@@ -2191,12 +2307,13 @@ int check(const char *_objname, bool orc)

objname = _objname;

- file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+ file.elf = elf_open(objname, O_RDWR);
if (!file.elf)
return 1;

INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.static_call_list);
file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
file.c_file = find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
@@ -2236,6 +2353,11 @@ int check(const char *_objname, bool orc)
warnings += ret;
}

+ ret = create_static_call_sections(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
if (orc) {
ret = create_orc(&file);
if (ret < 0)
@@ -2244,7 +2366,9 @@ int check(const char *_objname, bool orc)
ret = create_orc_sections(&file);
if (ret < 0)
goto out;
+ }

+ if (orc || !list_empty(&file.static_call_list)) {
ret = elf_write(file.elf);
if (ret < 0)
goto out;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index e6e8a655b556..56b8b7fb1bd1 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -39,6 +39,7 @@ struct insn_state {
struct instruction {
struct list_head list;
struct hlist_node hash;
+ struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;
@@ -60,6 +61,7 @@ struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 16);
+ struct list_head static_call_list;
struct section *whitelist;
bool ignore_unreachables, c_file, hints, rodata;
};
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index bc97ed86b9cd..3cf44d7cc3ac 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -62,6 +62,7 @@ struct symbol {
unsigned long offset;
unsigned int len;
struct symbol *pfunc, *cfunc;
+ bool static_call_tramp;
};

struct rela {
diff --git a/tools/objtool/include/linux/static_call_types.h b/tools/objtool/include/linux/static_call_types.h
new file mode 100644
index 000000000000..6859b208de6e
--- /dev/null
+++ b/tools/objtool/include/linux/static_call_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/stringify.h>
+
+#define STATIC_CALL_TRAMP_PREFIX ____static_call_tramp_
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+
+#define STATIC_CALL_TRAMP(key) __PASTE(STATIC_CALL_TRAMP_PREFIX, key)
+#define STATIC_CALL_TRAMP_STR(key) __stringify(STATIC_CALL_TRAMP(key))
+
+/* The static call site table is created by objtool. */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 1470e74e9d66..e1a204bf3556 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -10,6 +10,7 @@ arch/x86/include/asm/insn.h
arch/x86/include/asm/inat.h
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
+include/linux/static_call_types.h
'

check()
--
2.17.2