[PATCH v4 14/18] static_call: Add static_cond_call()

From: Peter Zijlstra
Date: Fri May 01 2020 - 16:31:32 EST


Extend the static_call infrastructure to optimize the following common
pattern:

if (func_ptr)
func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

NOTE: this is 'obviously' limited to functions with a 'void' return type.

NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed
to a full function.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
arch/x86/include/asm/static_call.h | 10 ++++++++
arch/x86/kernel/static_call.c | 42 ++++++++++++++++++++++++++++---------
include/linux/static_call.h | 29 +++++++++++++++++++++++++
3 files changed, 71 insertions(+), 10 deletions(-)

--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -30,4 +30,14 @@
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")

+#define ARCH_DEFINE_STATIC_CALL_RETTRAMP(name) \
+ asm(".pushsection .static_call.text, \"ax\" \n" \
+ ".align 4 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ " ret; nop; nop; nop; nop; \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
#endif /* _ASM_STATIC_CALL_H */
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -4,19 +4,41 @@
#include <linux/bug.h>
#include <asm/text-patching.h>

-static void __static_call_transform(void *insn, u8 opcode, void *func)
+enum insn_type {
+ call = 0, /* site call */
+ nop = 1, /* site cond-call */
+ jmp = 2, /* tramp / site tail-call */
+ ret = 3, /* tramp / site cond-tail-call */
+};
+
+static void __static_call_transform(void *insn, enum insn_type type, void *func)
{
- const void *code = text_gen_insn(opcode, insn, func);
+ int size = CALL_INSN_SIZE;
+ const void *code;

- if (WARN_ONCE(*(u8 *)insn != opcode,
- "unexpected static call insn opcode 0x%x at %pS\n",
- opcode, insn))
- return;
+ switch (type) {
+ case call:
+ code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+ break;
+
+ case nop:
+ code = ideal_nops[NOP_ATOMIC5];
+ break;
+
+ case jmp:
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
+ break;
+
+ case ret:
+ code = text_gen_insn(RET_INSN_OPCODE, insn, func);
+ size = RET_INSN_SIZE;
+ break;
+ }

- if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
+ if (memcmp(insn, code, size) == 0)
return;

- text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
+ text_poke_bp(insn, code, size, NULL);
}

void arch_static_call_transform(void *site, void *tramp, void *func)
@@ -24,10 +46,10 @@ void arch_static_call_transform(void *si
mutex_lock(&text_mutex);

if (tramp)
- __static_call_transform(tramp, JMP32_INSN_OPCODE, func);
+ __static_call_transform(tramp, jmp + !func, func);

if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
- __static_call_transform(site, CALL_INSN_OPCODE, func);
+ __static_call_transform(site, !func, func);

mutex_unlock(&text_mutex);
}
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -16,7 +16,9 @@
*
* DECLARE_STATIC_CALL(name, func);
* DEFINE_STATIC_CALL(name, func);
+ * DEFINE_STATIC_COND_CALL(name, typename);
* static_call(name)(args...);
+ * static_cond_call(name)(args...)
* static_call_update(name, func);
*
* Usage example:
@@ -120,7 +122,16 @@ extern int static_call_text_reserved(voi
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ .type = 1, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RETTRAMP(name)
+
#define static_call(name) __static_call(name)
+#define static_cond_call(name) (void)__static_call(name)

#define EXPORT_STATIC_CALL(name) \
EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \
@@ -143,7 +154,15 @@ struct static_call_key {
}; \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ }; \
+ ARCH_DEFINE_STATIC_CALL_RETTRAMP(name)
+
#define static_call(name) __static_call(name)
+#define static_cond_call(name) (void)__static_call(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -179,9 +198,19 @@ struct static_call_key {
.func = _func, \
}

+#define DEFINE_STATIC_COND_CALL(name, _func) \
+ DECLARE_STATIC_CALL(name, _func); \
+ struct static_call_key STATIC_CALL_KEY(name) = { \
+ .func = NULL, \
+ }
+
#define static_call(name) \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

+#define static_cond_call(name) \
+ if (STATIC_CALL_KEY(name).func) \
+ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
+
static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{