[RFC PATCH] x86/syscalls: allow tracing of __do_sys_[syscall] functions

From: Nadav Amit
Date: Tue Sep 13 2022 - 17:27:13 EST


From: Nadav Amit <namit@xxxxxxxxxx>

Tracing - through ftrace function tracer and kprobes - of certain common
syscall functions is currently disabled. Setting kprobes on these
functions is specifically useful for debugging of syscall failures.

Such tracing is disabled since __do_sys_[syscall] functions are declared
as "inline". "inline" in the kernel is actually defined as a macro that
in addition to using the inline keyword also disables tracing (notrace).
According to the comments in the code, tracing inline functions can
wreck havoc, which is probably true in some cases.

In practice, however, this might be too extensive. The compiler regards
the "inline" keyword only as a hint, which it is free to ignore. In
fact, in my builds gcc ignores the "inline" hint for many
__do_sys_[syscall] since some of these functions are quite big and
called from multiple locations (for compat). As a result, these
functions cannot be traced.

There are 3 possible solutions for enabling the tracing of
__do_sys_[syscall]:

1. Mark __do_sys_[syscall] as __always_inline instead of inline. This
would increase the executable size, which might not be desired.

2. Remove the inline hint from __do_sys_[syscall]. Again, it might
affect the generated code, inducing function call overhead for some
syscalls.

3. Remove "notrace" from the "inline" macro definition, and require
functions that cannot be traced to be marked explicitly as "notrace".
This might be the most correct solution, which would also enable tracing
of additional useful functions. But finding the functions that cannot
be traced is not easy without some automation.

4. Avoid the use of "notrace" specifically for __do_sys_[syscall].

Use the last approach to enable the tracing of __do_sys_[syscall]
functions. Introduce an "inline_trace" macro that sets the "__inline"
keyword without "notrace". Use it for the syscall wrappers.

This enables the tracing of 54 useful functions on my build, for
instance, __do_sys_vmsplice(), __do_sys_mremap() and
__do_sys_process_madvise().

Cc: "Peter Zijlstra (Intel)" <peterz@xxxxxxxxxxxxx>
Cc: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx>
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
arch/x86/include/asm/syscall_wrapper.h | 8 ++++----
include/linux/compat.h | 4 ++--
include/linux/compiler_types.h | 6 +++++-
3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 59358d1bf880..2673e3551aad 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -201,14 +201,14 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);

#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ static inline_trace long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
__IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \
__X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\
} \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+ static inline_trace long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))

/*
* As some compat syscalls may not be implemented, we need to expand
@@ -227,7 +227,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);

#define __SYSCALL_DEFINEx(x, name, ...) \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ static inline_trace long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
__X64_SYS_STUBx(x, name, __VA_ARGS__) \
__IA32_SYS_STUBx(x, name, __VA_ARGS__) \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
@@ -237,7 +237,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
- static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+ static inline_trace long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))

/*
* As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 594357881b0b..4d786581219b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -75,7 +75,7 @@
asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_compat_sys##name)))); \
ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+ static inline_trace long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
@@ -84,7 +84,7 @@
return ret; \
} \
__diag_pop(); \
- static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+ static inline_trace long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#endif /* COMPAT_SYSCALL_DEFINEx */

struct compat_iovec {
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 4f2a819fd60a..d88bfcf387ea 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -152,8 +152,12 @@ struct ftrace_likely_data {
* externally visible function. This makes extern inline behave as per gnu89
* semantics rather than c99. This prevents multiple symbol definition errors
* of extern inline functions at link time.
- * A lot of inline functions can cause havoc with function tracing.
+ *
+ * A lot of inline functions can cause havoc with function tracing. If the
+ * function is known to be safe for tracing, inline_trace can be used. Otherwise
+ * inline would prevent tracing.
*/
+#define inline_trace __inline __gnu_inline __inline_maybe_unused
#define inline inline __gnu_inline __inline_maybe_unused notrace

/*
--
2.25.1