[PATCH 4.4 063/190] tracing: Map all PIDs to command lines

From: Greg Kroah-Hartman
Date: Thu May 20 2021 - 07:32:20 EST


From: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>

commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream.

The default max PID is set by PID_MAX_DEFAULT, and the tracing
infrastructure uses this number to map PIDs to the comm names of the
tasks, such output of the trace can show names from the recorded PIDs in
the ring buffer. This mapping is also exported to user space via the
"saved_cmdlines" file in the tracefs directory.

But currently the mapping expects the PIDs to be less than
PID_MAX_DEFAULT, which is the default maximum and not the real maximum.
Recently, systemd will increases the maximum value of a PID on the system,
and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its
comm is not recorded. This leads to the entire trace to have "<...>" as
the comm name, which is pretty useless.

Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead
of just mapping the index to the comm, map a mask of the PID
(PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the
map_cmdline_to_pid array (that already exists).

This bug goes back to the beginning of ftrace, but hasn't been an issue
until user space started increasing the maximum value of PIDs.

Link: https://lkml.kernel.org/r/20210427113207.3c601884@xxxxxxxxxxxxxxxxxx

Cc: stable@xxxxxxxxxxxxxxx
Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure")
Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
kernel/trace/trace.c | 41 +++++++++++++++--------------------------
1 file changed, 15 insertions(+), 26 deletions(-)

--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1562,14 +1562,13 @@ void trace_stop_cmdline_recording(void);

static int trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned tpid, idx;

/* treat recording of idle task as a success */
if (!tsk->pid)
return 1;

- if (unlikely(tsk->pid > PID_MAX_DEFAULT))
- return 0;
+ tpid = tsk->pid & (PID_MAX_DEFAULT - 1);

/*
* It's not the end of the world if we don't get
@@ -1580,26 +1579,15 @@ static int trace_save_cmdline(struct tas
if (!arch_spin_trylock(&trace_cmdline_lock))
return 0;

- idx = savedcmd->map_pid_to_cmdline[tsk->pid];
+ idx = savedcmd->map_pid_to_cmdline[tpid];
if (idx == NO_CMDLINE_MAP) {
idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;

- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = savedcmd->map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
- savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
-
+ savedcmd->map_pid_to_cmdline[tpid] = idx;
savedcmd->cmdline_idx = idx;
}

+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
set_cmdline(idx, tsk->comm);

arch_spin_unlock(&trace_cmdline_lock);
@@ -1610,6 +1598,7 @@ static int trace_save_cmdline(struct tas
static void __trace_find_cmdline(int pid, char comm[])
{
unsigned map;
+ int tpid;

if (!pid) {
strcpy(comm, "<idle>");
@@ -1621,16 +1610,16 @@ static void __trace_find_cmdline(int pid
return;
}

- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
+ tpid = pid & (PID_MAX_DEFAULT - 1);
+ map = savedcmd->map_pid_to_cmdline[tpid];
+ if (map != NO_CMDLINE_MAP) {
+ tpid = savedcmd->map_cmdline_to_pid[map];
+ if (tpid == pid) {
+ strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+ return;
+ }
}
-
- map = savedcmd->map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
- strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
- else
- strcpy(comm, "<...>");
+ strcpy(comm, "<...>");
}

void trace_find_cmdline(int pid, char comm[])