[PATCH] tracing: rewrite trace_save_cmdline()

From: Lai Jiangshan
Date: Mon Jun 22 2009 - 03:08:14 EST


The attachment for this mail is a optimized patch when
SAVED_CMDLINE_COLLISION_WINDOW = 1. It has the best probe time(zero),
but it has a worst replacement-when-collision behavior.

I'm not surprise if you guys like the one in attachment:
It's not the end of the world if we do a bad replacement sometimes.

-------------------

Subject: [PATCH] tracing: rewrite trace_save_cmdline()

I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
wastes too much memory, so I remove it.

The old FIFO algorithm is replaced with a new one:
Open address hash table with double hash + tick-LRU.

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
---
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f..a0b163f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
+#include <linux/hash.h>

#include "trace.h"
#include "trace_output.h"
@@ -649,24 +650,19 @@ void tracing_reset_current_online_cpus(void)
tracing_reset_online_cpus(&global_trace);
}

-#define SAVED_CMDLINES 128
-#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+#define SAVED_CMDLINE_SHIFT 8
+#define SAVED_CMDLINE_COLLISION_WINDOW 4 /* 4 is enough! */
+#define SAVED_CMDLINES (1 << SAVED_CMDLINE_SHIFT)
+#define SAVED_CMDLINE_IDX(hash) ((hash) & (SAVED_CMDLINES - 1))
+static unsigned map_cmdline_to_tick[SAVED_CMDLINES];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
+static u32 cmdlines_tick;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;

/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;

-static void trace_init_cmdlines(void)
-{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
- cmdline_idx = 0;
-}
-
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);

@@ -753,13 +749,28 @@ void tracing_stop(void)

void trace_stop_cmdline_recording(void);

+/* Is @x in the range [@a, @b] */
+static inline int in_range(u32 a, u32 b, u32 x)
+{
+ /*
+ * let dist1 = x - a, dist2 = b - x, then
+ * @x in the range [@a, @b] iff (dist1 + dist2) is not overflow
+ * (dist1 + dist2) is not overflow iff dist1 <= ~dist2
+ */
+ return (x - a) <= ~(b - x);
+}
+
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned int hash, hash2, idx, map, i;
+ u32 tick, min_tick;

- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;

+ hash = map = hash_32((u32)tsk->pid, SAVED_CMDLINE_SHIFT);
+ hash2 = (tsk->pid << 1) | 1; /* odd */
+
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
@@ -769,52 +780,73 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;

- idx = map_pid_to_cmdline[tsk->pid];
- if (idx == NO_CMDLINE_MAP) {
- idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+ min_tick = map_cmdline_to_tick[map];

- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+ /* apply tick-LRU algorithm on collision window */
+ for (i = 0; i < SAVED_CMDLINE_COLLISION_WINDOW; i++) {
+ idx = SAVED_CMDLINE_IDX(hash);
+
+ /* Is map_cmdline_to_pid[idx] unused? */
+ if (!map_cmdline_to_pid[idx]) {
+ map = idx;
+ break;
+ }

- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
+ /* Has tsk->pid been saved at map_cmdline_to_pid[idx]? */
+ if (map_cmdline_to_pid[idx] == tsk->pid) {
+ map = idx;
+ break;
+ }

- cmdline_idx = idx;
+ /* Is @tick less than @min_tick? */
+ tick = map_cmdline_to_tick[idx];
+ if (!in_range(min_tick, cmdlines_tick, tick)) {
+ min_tick = tick;
+ map = idx;
+ }
+
+ hash += hash2;
}

- memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ cmdlines_tick++;
+ map_cmdline_to_tick[map] = cmdlines_tick;
+ map_cmdline_to_pid[map] = tsk->pid;
+ memcpy(saved_cmdlines[map], tsk->comm, TASK_COMM_LEN);

__raw_spin_unlock(&trace_cmdline_lock);
}

void trace_find_cmdline(int pid, char comm[])
{
- unsigned map;
+ unsigned int hash, hash2, i, map;
+ const char *saved_comm = "<...>";

if (!pid) {
strcpy(comm, "<idle>");
return;
}

- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
- }
+ hash = hash_32((u32)pid, SAVED_CMDLINE_SHIFT);
+ hash2 = (pid << 1) | 1; /* odd */

preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
- strcpy(comm, saved_cmdlines[map]);
- else
- strcpy(comm, "<...>");
+
+ for (i = 0; i < SAVED_CMDLINE_COLLISION_WINDOW; i++) {
+ map = SAVED_CMDLINE_IDX(hash);
+
+ if (!map_cmdline_to_pid[map])
+ break;
+
+ if (map_cmdline_to_pid[map] == pid) {
+ saved_comm = saved_cmdlines[map];
+ break;
+ }
+
+ hash += hash2;
+ }
+
+ strcpy(comm, saved_comm);

__raw_spin_unlock(&trace_cmdline_lock);
preempt_enable();
@@ -2470,7 +2502,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
int r;

pid = map_cmdline_to_pid[i];
- if (pid == -1 || pid == NO_CMDLINE_MAP)
+ if (!pid)
continue;

trace_find_cmdline(pid, buf_comm);
@@ -4332,8 +4364,6 @@ __init static int tracer_alloc_buffers(void)
max_tr.data[i] = &per_cpu(max_data, i);
}

- trace_init_cmdlines();
-
register_tracer(&nop_trace);
current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f..d583ba5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
+#include <linux/hash.h>

#include "trace.h"
#include "trace_output.h"
@@ -649,24 +650,15 @@ void tracing_reset_current_online_cpus(void)
tracing_reset_online_cpus(&global_trace);
}

-#define SAVED_CMDLINES 128
-#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+#define SAVED_CMDLINE_SHIFT 8
+#define SAVED_CMDLINES (1 << SAVED_CMDLINE_SHIFT)
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;

/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;

-static void trace_init_cmdlines(void)
-{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
- cmdline_idx = 0;
-}
-
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);

@@ -755,11 +747,13 @@ void trace_stop_cmdline_recording(void);

static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned idx;

- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;

+ idx = hash_32((u32)tsk->pid, SAVED_CMDLINE_SHIFT);
+
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
@@ -769,27 +763,8 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;

- idx = map_pid_to_cmdline[tsk->pid];
- if (idx == NO_CMDLINE_MAP) {
- idx = (cmdline_idx + 1) % SAVED_CMDLINES;
-
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
-
- cmdline_idx = idx;
- }
-
- memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ map_cmdline_to_pid[idx] = tsk->pid;
+ memcpy(saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);

__raw_spin_unlock(&trace_cmdline_lock);
}
@@ -803,15 +778,17 @@ void trace_find_cmdline(int pid, char comm[])
return;
}

- if (pid > PID_MAX_DEFAULT) {
+ map = hash_32((u32)pid, SAVED_CMDLINE_SHIFT);
+
+ if (map_cmdline_to_pid[map] != pid) {
strcpy(comm, "<...>");
return;
}

preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
+
+ if (map_cmdline_to_pid[map] == pid)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
@@ -2470,7 +2447,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
int r;

pid = map_cmdline_to_pid[i];
- if (pid == -1 || pid == NO_CMDLINE_MAP)
+ if (!pid)
continue;

trace_find_cmdline(pid, buf_comm);
@@ -4332,8 +4309,6 @@ __init static int tracer_alloc_buffers(void)
max_tr.data[i] = &per_cpu(max_data, i);
}

- trace_init_cmdlines();
-
register_tracer(&nop_trace);
current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER