[PATCH v4 01/12] tracing/syscalls: remove syscall_nr from syscall metadata

From: Marcin Nowakowski
Date: Fri Oct 14 2016 - 04:35:55 EST


Some architectures map multiple syscall numbers to a single syscall.
This meant that on those platforms, some system calls could not be
properly traced using syscall event tracing mechanism, as a different
number of a syscall was used for registration to the one used by
applications.
We can use syscall lookup together with the syscall metadata table
traversal to register for appropriate events instead. This slightly
increases the overhead during event (un)registration, but does not
impact the trace events themselves, which still use syscall numbers
directly.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
---
include/linux/syscalls.h | 1 -
include/trace/syscall.h | 2 -
kernel/trace/trace_syscalls.c | 125 ++++++++++++++++++++++++------------------
3 files changed, 72 insertions(+), 56 deletions(-)

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0d7abb8..88324cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -160,7 +160,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
static struct syscall_metadata __used \
__syscall_meta_##sname = { \
.name = "sys"#sname, \
- .syscall_nr = -1, /* Filled in at boot */ \
.nb_args = nb, \
.types = nb ? types_##sname : NULL, \
.args = nb ? args_##sname : NULL, \
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 7434f0f..b5fbebe 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -13,7 +13,6 @@
* A syscall entry in the ftrace syscalls array.
*
* @name: name of the syscall
- * @syscall_nr: number of the syscall
* @nb_args: number of parameters it takes
* @types: list of types as strings
* @args: list of args as strings (args[i] matches types[i])
@@ -23,7 +22,6 @@
*/
struct syscall_metadata {
const char *name;
- int syscall_nr;
int nb_args;
const char **types;
const char **args;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e10395..f50563a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -405,16 +405,21 @@ static int reg_event_syscall_enter(struct trace_event_file *file,
int ret = 0;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
- return -ENOSYS;
mutex_lock(&syscall_trace_lock);
- if (!tr->sys_refcount_enter)
+ if (!tr->sys_refcount_enter) {
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
- if (!ret) {
- rcu_assign_pointer(tr->enter_syscall_files[num], file);
- tr->sys_refcount_enter++;
+ if (ret)
+ goto out_unlock;
+ }
+
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ rcu_assign_pointer(tr->enter_syscall_files[num], file);
}
+ tr->sys_refcount_enter++;
+
+out_unlock:
mutex_unlock(&syscall_trace_lock);
return ret;
}
@@ -425,12 +430,13 @@ static void unreg_event_syscall_enter(struct trace_event_file *file,
struct trace_array *tr = file->tr;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
- return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_enter--;
- RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
+ }
if (!tr->sys_refcount_enter)
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
mutex_unlock(&syscall_trace_lock);
@@ -443,16 +449,21 @@ static int reg_event_syscall_exit(struct trace_event_file *file,
int ret = 0;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
- return -ENOSYS;
mutex_lock(&syscall_trace_lock);
- if (!tr->sys_refcount_exit)
+ if (!tr->sys_refcount_exit) {
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
- if (!ret) {
- rcu_assign_pointer(tr->exit_syscall_files[num], file);
- tr->sys_refcount_exit++;
+ if (ret)
+ goto out_unlock;
}
+
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ rcu_assign_pointer(tr->exit_syscall_files[num], file);
+ }
+ tr->sys_refcount_exit++;
+
+out_unlock:
mutex_unlock(&syscall_trace_lock);
return ret;
}
@@ -463,12 +474,13 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
struct trace_array *tr = file->tr;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
- return;
mutex_lock(&syscall_trace_lock);
tr->sys_refcount_exit--;
- RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
+ }
if (!tr->sys_refcount_exit)
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
mutex_unlock(&syscall_trace_lock);
@@ -477,14 +489,6 @@ static void unreg_event_syscall_exit(struct trace_event_file *file,
static int __init init_syscall_trace(struct trace_event_call *call)
{
int id;
- int num;
-
- num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (num < 0 || num >= NR_syscalls) {
- pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
- ((struct syscall_metadata *)call->data)->name);
- return -ENOSYS;
- }

if (set_syscall_print_fmt(call) < 0)
return -ENOMEM;
@@ -547,7 +551,6 @@ void __init init_ftrace_syscalls(void)
if (!meta)
continue;

- meta->syscall_nr = i;
syscalls_metadata[i] = meta;
}
}
@@ -604,17 +607,23 @@ static int perf_sysenter_enable(struct trace_event_call *call)
int ret = 0;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
-
mutex_lock(&syscall_trace_lock);
- if (!sys_perf_refcount_enter)
+ if (!sys_perf_refcount_enter) {
ret = register_trace_sys_enter(perf_syscall_enter, NULL);
- if (ret) {
- pr_info("event trace: Could not activate syscall entry trace point");
- } else {
- set_bit(num, enabled_perf_enter_syscalls);
- sys_perf_refcount_enter++;
+ if (ret) {
+ pr_info("event trace: Could not activate syscall entry trace point");
+ goto out_unlock;
+ }
+ }
+
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ set_bit(num, enabled_perf_enter_syscalls);
}
+ sys_perf_refcount_enter++;
+
+out_unlock:
mutex_unlock(&syscall_trace_lock);
return ret;
}
@@ -623,11 +632,13 @@ static void perf_sysenter_disable(struct trace_event_call *call)
{
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
-
mutex_lock(&syscall_trace_lock);
sys_perf_refcount_enter--;
- clear_bit(num, enabled_perf_enter_syscalls);
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ clear_bit(num, enabled_perf_enter_syscalls);
+ }
if (!sys_perf_refcount_enter)
unregister_trace_sys_enter(perf_syscall_enter, NULL);
mutex_unlock(&syscall_trace_lock);
@@ -675,17 +686,23 @@ static int perf_sysexit_enable(struct trace_event_call *call)
int ret = 0;
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
-
mutex_lock(&syscall_trace_lock);
- if (!sys_perf_refcount_exit)
+ if (!sys_perf_refcount_exit) {
ret = register_trace_sys_exit(perf_syscall_exit, NULL);
- if (ret) {
- pr_info("event trace: Could not activate syscall exit trace point");
- } else {
- set_bit(num, enabled_perf_exit_syscalls);
- sys_perf_refcount_exit++;
+ if (ret) {
+ pr_info("event trace: Could not activate syscall exit trace point");
+ goto out_unlock;
+ }
+ }
+
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ set_bit(num, enabled_perf_exit_syscalls);
}
+ sys_perf_refcount_exit++;
+
+out_unlock:
mutex_unlock(&syscall_trace_lock);
return ret;
}
@@ -694,11 +711,13 @@ static void perf_sysexit_disable(struct trace_event_call *call)
{
int num;

- num = ((struct syscall_metadata *)call->data)->syscall_nr;
-
mutex_lock(&syscall_trace_lock);
sys_perf_refcount_exit--;
- clear_bit(num, enabled_perf_exit_syscalls);
+ for (num = 0; num < NR_syscalls; num++) {
+ if (syscalls_metadata[num] &&
+ (syscalls_metadata[num] == call->data))
+ clear_bit(num, enabled_perf_exit_syscalls);
+ }
if (!sys_perf_refcount_exit)
unregister_trace_sys_exit(perf_syscall_exit, NULL);
mutex_unlock(&syscall_trace_lock);
--
2.7.4