[PATCH] perf trace: Skip internal syscall arguments

From: Namhyung Kim

Date: Wed Nov 26 2025 - 23:44:19 EST


Recent changes in the linux-next kernel will add new field for syscalls
to have contents in the userspace like below.

# cat /sys/kernel/tracing/events/syscalls/sys_enter_write/format
name: sys_enter_write
ID: 758
format:
field:unsigned short common_type; offset:0; size:2; signed:0;
field:unsigned char common_flags; offset:2; size:1; signed:0;
field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
field:int common_pid; offset:4; size:4; signed:1;

field:int __syscall_nr; offset:8; size:4; signed:1;
field:unsigned int fd; offset:16; size:8; signed:0;
field:const char * buf; offset:24; size:8; signed:0;
field:size_t count; offset:32; size:8; signed:0;
field:__data_loc char[] __buf_val; offset:40; size:4; signed:0;

print fmt: "fd: 0x%08lx, buf: 0x%08lx (%s), count: 0x%08lx", ((unsigned long)(REC->fd)),
((unsigned long)(REC->buf)), __print_dynamic_array(__buf_val, 1),
((unsigned long)(REC->count))

We have a different way to handle those arguments and this change
confuses perf trace then make some tests failing. Fix it by skipping
the new fields that have "__data_loc char[]" type.

Maybe we can switch to this instead of the BPF augmentation later.

Reported-by: Thomas Richter <tmricht@xxxxxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Howard Chu <howardchu95@xxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
tools/perf/builtin-trace.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a743bda294bd3400..baee1f6956001d86 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2069,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
}

+/*
+ * v6.19 kernel added new fields to read userspace memory for event tracing.
+ * But it's not used by perf and confuses the syscall parameters.
+ */
+static bool is_internal_field(struct tep_format_field *field)
+{
+ return !strcmp(field->type, "__data_loc char[]");
+}
+
static struct tep_format_field *
syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
bool *use_btf)
@@ -2077,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
int len;

for (; field; field = field->next, ++arg) {
+ /* assume it's the last argument */
+ if (is_internal_field(field))
+ continue;
+
last_field = field;

if (arg->scnprintf)
@@ -2145,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
{
char tp_name[128];
const char *name;
+ struct tep_format_field *field;
int err;

if (sc->nonexistent)
@@ -2201,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
--sc->nr_args;
}

+ field = sc->args;
+ while (field) {
+ if (is_internal_field(field))
+ --sc->nr_args;
+ field = field->next;
+ }
+
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");

--
2.52.0.487.g5c8c507ade-goog