* [PATCH] tracing: rewrite trace_save_cmdline()
@ 2009-06-22 7:09 Lai Jiangshan
2009-06-30 8:47 ` [PATCH] tracing: use hash table to simulate the sparse array Lai Jiangshan
0 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-06-22 7:09 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Ingo Molnar, Frederic Weisbecker, LKML
[-- Attachment #1: Type: text/plain, Size: 5882 bytes --]
The attachment for this mail is a optimized patch when
SAVED_CMDLINE_COLLISION_WINDOW = 1. It has the best probe time(zero),
but it has a worst replacement-when-collision behavior.
I'm not surprise if you guys like the one in attachment:
It's not the end of the world if we do a bad replacement sometimes.
-------------------
Subject: [PATCH] tracing: rewrite trace_save_cmdline()
I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
wastes too much memory, so I remove it.
The old FIFO algorithm is replaced with a new one:
Open address hash table with double hash + tick-LRU.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f..a0b163f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
+#include <linux/hash.h>
#include "trace.h"
#include "trace_output.h"
@@ -649,24 +650,19 @@ void tracing_reset_current_online_cpus(void)
tracing_reset_online_cpus(&global_trace);
}
-#define SAVED_CMDLINES 128
-#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+#define SAVED_CMDLINE_SHIFT 8
+#define SAVED_CMDLINE_COLLISION_WINDOW 4 /* 4 is enough! */
+#define SAVED_CMDLINES (1 << SAVED_CMDLINE_SHIFT)
+#define SAVED_CMDLINE_IDX(hash) ((hash) & (SAVED_CMDLINES - 1))
+static unsigned map_cmdline_to_tick[SAVED_CMDLINES];
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
+static u32 cmdlines_tick;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
-static void trace_init_cmdlines(void)
-{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
- cmdline_idx = 0;
-}
-
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -753,13 +749,28 @@ void tracing_stop(void)
void trace_stop_cmdline_recording(void);
+/* Is @x in the range [@a, @b] */
+static inline int in_range(u32 a, u32 b, u32 x)
+{
+ /*
+ * let dist1 = x - a, dist2 = b - x, then
+ * @x in the range [@a, @b] iff (dist1 + dist2) is not overflow
+ * (dist1 + dist2) is not overflow iff dist1 <= ~dist2
+ */
+ return (x - a) <= ~(b - x);
+}
+
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned int hash, hash2, idx, map, i;
+ u32 tick, min_tick;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;
+ hash = map = hash_32((u32)tsk->pid, SAVED_CMDLINE_SHIFT);
+ hash2 = (tsk->pid << 1) | 1; /* odd */
+
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
@@ -769,52 +780,73 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;
- idx = map_pid_to_cmdline[tsk->pid];
- if (idx == NO_CMDLINE_MAP) {
- idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+ min_tick = map_cmdline_to_tick[map];
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+ /* apply tick-LRU algorithm on collision window */
+ for (i = 0; i < SAVED_CMDLINE_COLLISION_WINDOW; i++) {
+ idx = SAVED_CMDLINE_IDX(hash);
+
+ /* Is map_cmdline_to_pid[idx] unused? */
+ if (!map_cmdline_to_pid[idx]) {
+ map = idx;
+ break;
+ }
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
+ /* Has tsk->pid been saved at map_cmdline_to_pid[idx]? */
+ if (map_cmdline_to_pid[idx] == tsk->pid) {
+ map = idx;
+ break;
+ }
- cmdline_idx = idx;
+ /* Is @tick less than @min_tick? */
+ tick = map_cmdline_to_tick[idx];
+ if (!in_range(min_tick, cmdlines_tick, tick)) {
+ min_tick = tick;
+ map = idx;
+ }
+
+ hash += hash2;
}
- memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ cmdlines_tick++;
+ map_cmdline_to_tick[map] = cmdlines_tick;
+ map_cmdline_to_pid[map] = tsk->pid;
+ memcpy(saved_cmdlines[map], tsk->comm, TASK_COMM_LEN);
__raw_spin_unlock(&trace_cmdline_lock);
}
void trace_find_cmdline(int pid, char comm[])
{
- unsigned map;
+ unsigned int hash, hash2, i, map;
+ const char *saved_comm = "<...>";
if (!pid) {
strcpy(comm, "<idle>");
return;
}
- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
- }
+ hash = hash_32((u32)pid, SAVED_CMDLINE_SHIFT);
+ hash2 = (pid << 1) | 1; /* odd */
preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
- strcpy(comm, saved_cmdlines[map]);
- else
- strcpy(comm, "<...>");
+
+ for (i = 0; i < SAVED_CMDLINE_COLLISION_WINDOW; i++) {
+ map = SAVED_CMDLINE_IDX(hash);
+
+ if (!map_cmdline_to_pid[map])
+ break;
+
+ if (map_cmdline_to_pid[map] == pid) {
+ saved_comm = saved_cmdlines[map];
+ break;
+ }
+
+ hash += hash2;
+ }
+
+ strcpy(comm, saved_comm);
__raw_spin_unlock(&trace_cmdline_lock);
preempt_enable();
@@ -2470,7 +2502,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
int r;
pid = map_cmdline_to_pid[i];
- if (pid == -1 || pid == NO_CMDLINE_MAP)
+ if (!pid)
continue;
trace_find_cmdline(pid, buf_comm);
@@ -4332,8 +4364,6 @@ __init static int tracer_alloc_buffers(void)
max_tr.data[i] = &per_cpu(max_data, i);
}
- trace_init_cmdlines();
-
register_tracer(&nop_trace);
current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER
[-- Attachment #2: hash_comm.diff --]
[-- Type: text/plain, Size: 3491 bytes --]
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f..d583ba5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
+#include <linux/hash.h>
#include "trace.h"
#include "trace_output.h"
@@ -649,24 +650,15 @@ void tracing_reset_current_online_cpus(void)
tracing_reset_online_cpus(&global_trace);
}
-#define SAVED_CMDLINES 128
-#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+#define SAVED_CMDLINE_SHIFT 8
+#define SAVED_CMDLINES (1 << SAVED_CMDLINE_SHIFT)
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
-static int cmdline_idx;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
/* temporary disable recording */
static atomic_t trace_record_cmdline_disabled __read_mostly;
-static void trace_init_cmdlines(void)
-{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
- cmdline_idx = 0;
-}
-
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -755,11 +747,13 @@ void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;
+ idx = hash_32((u32)tsk->pid, SAVED_CMDLINE_SHIFT);
+
/*
* It's not the end of the world if we don't get
* the lock, but we also don't want to spin
@@ -769,27 +763,8 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;
- idx = map_pid_to_cmdline[tsk->pid];
- if (idx == NO_CMDLINE_MAP) {
- idx = (cmdline_idx + 1) % SAVED_CMDLINES;
-
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
-
- cmdline_idx = idx;
- }
-
- memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ map_cmdline_to_pid[idx] = tsk->pid;
+ memcpy(saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
__raw_spin_unlock(&trace_cmdline_lock);
}
@@ -803,15 +778,17 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
- if (pid > PID_MAX_DEFAULT) {
+ map = hash_32((u32)pid, SAVED_CMDLINE_SHIFT);
+
+ if (map_cmdline_to_pid[map] != pid) {
strcpy(comm, "<...>");
return;
}
preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
+
+ if (map_cmdline_to_pid[map] == pid)
strcpy(comm, saved_cmdlines[map]);
else
strcpy(comm, "<...>");
@@ -2470,7 +2447,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
int r;
pid = map_cmdline_to_pid[i];
- if (pid == -1 || pid == NO_CMDLINE_MAP)
+ if (!pid)
continue;
trace_find_cmdline(pid, buf_comm);
@@ -4332,8 +4309,6 @@ __init static int tracer_alloc_buffers(void)
max_tr.data[i] = &per_cpu(max_data, i);
}
- trace_init_cmdlines();
-
register_tracer(&nop_trace);
current_trace = &nop_trace;
#ifdef CONFIG_BOOT_TRACER
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH] tracing: use hash table to simulate the sparse array
2009-06-22 7:09 [PATCH] tracing: rewrite trace_save_cmdline() Lai Jiangshan
@ 2009-06-30 8:47 ` Lai Jiangshan
2009-06-30 11:59 ` Frederic Weisbecker
0 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-06-30 8:47 UTC (permalink / raw)
To: Steven Rostedt; +Cc: Ingo Molnar, Frederic Weisbecker, LKML
Lai Jiangshan wrote:
>
> Subject: [PATCH] tracing: rewrite trace_save_cmdline()
>
> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
> wastes too much memory, so I remove it.
>
> The old FIFO algorithm is replaced with a new one:
> Open address hash table with double hash + tick-LRU.
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
This patch reduces the memory usage.(save 128K memory in kernel)
But it's too complicated, and it changes the original algorithm.
This new patch does NOT change the original algorithm,
but it uses a hash table to simulate the sparse array.
---------------
Subject: [PATCH] tracing: use hash table to simulate the sparse array
I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
wastes too much memory, so I remove it.
A hash table is added to simulate the sparse array. And
map_pid_to_cmdline and map_cmdline_to_pid become light functions.
map_pid_to_cmdline[pid] ==> map_pid_to_cmdline(pid)
map_cmdline_to_pid[idx] ==> map_cmdline_to_pid(idx)
[Impact: save about 127k memory]
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0d..3526b9c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/poll.h>
#include <linux/gfp.h>
#include <linux/fs.h>
+#include <linux/hash.h>
#include "trace.h"
#include "trace_output.h"
@@ -648,10 +649,47 @@ void tracing_reset_current_online_cpus(void)
tracing_reset_online_cpus(&global_trace);
}
-#define SAVED_CMDLINES 128
+#define SAVED_CMDLINES_SHIFT 7
+#define SAVED_CMDLINES (1 << 7)
#define NO_CMDLINE_MAP UINT_MAX
-static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
-static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
+
+struct cmdline_index {
+ struct hlist_node node;
+ unsigned int pid;
+};
+
+struct hlist_head map_head[SAVED_CMDLINES];
+struct cmdline_index indexes[SAVED_CMDLINES];
+
+static unsigned int map_pid_to_cmdline(unsigned int pid)
+{
+ struct cmdline_index *index;
+ struct hlist_node *n;
+ unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
+
+ hlist_for_each_entry(index, n, &map_head[hash], node) {
+ if (index->pid == pid)
+ return index - indexes;
+ }
+
+ return NO_CMDLINE_MAP;
+}
+
+static unsigned int map_cmdline_to_pid(unsigned int idx)
+{
+ return indexes[idx].pid;
+}
+
+static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
+{
+ unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
+
+ if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
+ hlist_del(&indexes[idx].node);
+ indexes[idx].pid = pid;
+ hlist_add_head(&indexes[idx].node, &map_head[hash]);
+}
+
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
@@ -661,8 +699,7 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
static void trace_init_cmdlines(void)
{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
+ memset(&indexes, NO_CMDLINE_MAP, sizeof(indexes));
cmdline_idx = 0;
}
@@ -754,9 +791,9 @@ void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned int idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;
/*
@@ -768,22 +805,11 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;
- idx = map_pid_to_cmdline[tsk->pid];
+ idx = map_pid_to_cmdline(tsk->pid);
if (idx == NO_CMDLINE_MAP) {
idx = (cmdline_idx + 1) % SAVED_CMDLINES;
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
+ do_map_cmdline_index(idx, tsk->pid);
cmdline_idx = idx;
}
@@ -802,14 +828,9 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
- }
-
preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
+ map = map_pid_to_cmdline(pid);
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
@@ -2458,7 +2479,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
for (i = 0; i < SAVED_CMDLINES; i++) {
int r;
- pid = map_cmdline_to_pid[i];
+ pid = map_cmdline_to_pid(i);
if (pid == -1 || pid == NO_CMDLINE_MAP)
continue;
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH] tracing: use hash table to simulate the sparse array
2009-06-30 8:47 ` [PATCH] tracing: use hash table to simulate the sparse array Lai Jiangshan
@ 2009-06-30 11:59 ` Frederic Weisbecker
2009-07-01 2:31 ` Lai Jiangshan
0 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2009-06-30 11:59 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, LKML
On Tue, Jun 30, 2009 at 04:47:38PM +0800, Lai Jiangshan wrote:
> Lai Jiangshan wrote:
> >
> > Subject: [PATCH] tracing: rewrite trace_save_cmdline()
> >
> > I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
> > wastes too much memory, so I remove it.
> >
> > The old FIFO algorithm is replaced with a new one:
> > Open address hash table with double hash + tick-LRU.
> >
> > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > ---
>
> This patch reduces the memory usage.(save 128K memory in kernel)
> But it's too complicated, and it changes the original algorithm.
>
> This new patch does NOT change the original algorithm,
> but it uses a hash table to simulate the sparse array.
>
> ---------------
>
> Subject: [PATCH] tracing: use hash table to simulate the sparse array
>
> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
> wastes too much memory, so I remove it.
>
> A hash table is added to simulate the sparse array. And
> map_pid_to_cmdline and map_cmdline_to_pid become light functions.
>
> map_pid_to_cmdline[pid] ==> map_pid_to_cmdline(pid)
> map_cmdline_to_pid[idx] ==> map_cmdline_to_pid(idx)
>
> [Impact: save about 127k memory]
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 3aa0a0d..3526b9c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -36,6 +36,7 @@
> #include <linux/poll.h>
> #include <linux/gfp.h>
> #include <linux/fs.h>
> +#include <linux/hash.h>
>
> #include "trace.h"
> #include "trace_output.h"
> @@ -648,10 +649,47 @@ void tracing_reset_current_online_cpus(void)
> tracing_reset_online_cpus(&global_trace);
> }
>
> -#define SAVED_CMDLINES 128
> +#define SAVED_CMDLINES_SHIFT 7
> +#define SAVED_CMDLINES (1 << 7)
> #define NO_CMDLINE_MAP UINT_MAX
> -static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
> -static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
> +
> +struct cmdline_index {
> + struct hlist_node node;
> + unsigned int pid;
> +};
> +
> +struct hlist_head map_head[SAVED_CMDLINES];
> +struct cmdline_index indexes[SAVED_CMDLINES];
> +
> +static unsigned int map_pid_to_cmdline(unsigned int pid)
> +{
> + struct cmdline_index *index;
> + struct hlist_node *n;
> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> +
> + hlist_for_each_entry(index, n, &map_head[hash], node) {
> + if (index->pid == pid)
> + return index - indexes;
> + }
> +
> + return NO_CMDLINE_MAP;
> +}
> +
> +static unsigned int map_cmdline_to_pid(unsigned int idx)
> +{
> + return indexes[idx].pid;
> +}
> +
> +static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
> +{
> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> +
> + if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
> + hlist_del(&indexes[idx].node);
> + indexes[idx].pid = pid;
> + hlist_add_head(&indexes[idx].node, &map_head[hash]);
> +}
If I understand well, you won't ever have more than one
entry per map_head[x]
So why are you using a hashlist that supports more than one
entry (the use of hlist_head op).
You could use a simple hashlist with only one entry on each
index to map the pid.
But the background idea of your patch looks good indeed.
Thanks.
> +
> static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
> static int cmdline_idx;
> static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
> @@ -661,8 +699,7 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
>
> static void trace_init_cmdlines(void)
> {
> - memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
> - memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
> + memset(&indexes, NO_CMDLINE_MAP, sizeof(indexes));
> cmdline_idx = 0;
> }
>
> @@ -754,9 +791,9 @@ void trace_stop_cmdline_recording(void);
>
> static void trace_save_cmdline(struct task_struct *tsk)
> {
> - unsigned pid, idx;
> + unsigned int idx;
>
> - if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
> + if (!tsk->pid)
> return;
>
> /*
> @@ -768,22 +805,11 @@ static void trace_save_cmdline(struct task_struct *tsk)
> if (!__raw_spin_trylock(&trace_cmdline_lock))
> return;
>
> - idx = map_pid_to_cmdline[tsk->pid];
> + idx = map_pid_to_cmdline(tsk->pid);
> if (idx == NO_CMDLINE_MAP) {
> idx = (cmdline_idx + 1) % SAVED_CMDLINES;
>
> - /*
> - * Check whether the cmdline buffer at idx has a pid
> - * mapped. We are going to overwrite that entry so we
> - * need to clear the map_pid_to_cmdline. Otherwise we
> - * would read the new comm for the old pid.
> - */
> - pid = map_cmdline_to_pid[idx];
> - if (pid != NO_CMDLINE_MAP)
> - map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
> -
> - map_cmdline_to_pid[idx] = tsk->pid;
> - map_pid_to_cmdline[tsk->pid] = idx;
> + do_map_cmdline_index(idx, tsk->pid);
>
> cmdline_idx = idx;
> }
> @@ -802,14 +828,9 @@ void trace_find_cmdline(int pid, char comm[])
> return;
> }
>
> - if (pid > PID_MAX_DEFAULT) {
> - strcpy(comm, "<...>");
> - return;
> - }
> -
> preempt_disable();
> __raw_spin_lock(&trace_cmdline_lock);
> - map = map_pid_to_cmdline[pid];
> + map = map_pid_to_cmdline(pid);
> if (map != NO_CMDLINE_MAP)
> strcpy(comm, saved_cmdlines[map]);
> else
> @@ -2458,7 +2479,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
> for (i = 0; i < SAVED_CMDLINES; i++) {
> int r;
>
> - pid = map_cmdline_to_pid[i];
> + pid = map_cmdline_to_pid(i);
> if (pid == -1 || pid == NO_CMDLINE_MAP)
> continue;
>
>
>
>
>
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] tracing: use hash table to simulate the sparse array
2009-06-30 11:59 ` Frederic Weisbecker
@ 2009-07-01 2:31 ` Lai Jiangshan
2009-07-01 19:25 ` Frederic Weisbecker
0 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-07-01 2:31 UTC (permalink / raw)
To: Frederic Weisbecker; +Cc: Steven Rostedt, Ingo Molnar, LKML
Frederic Weisbecker wrote:
> On Tue, Jun 30, 2009 at 04:47:38PM +0800, Lai Jiangshan wrote:
>> Lai Jiangshan wrote:
>>> Subject: [PATCH] tracing: rewrite trace_save_cmdline()
>>>
>>> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
>>> wastes too much memory, so I remove it.
>>>
>>> The old FIFO algorithm is replaced with a new one:
>>> Open address hash table with double hash + tick-LRU.
>>>
>>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>>> ---
>> This patch reduces the memory usage.(save 128K memory in kernel)
>> But it's too complicated, and it changes the original algorithm.
>>
>> This new patch does NOT change the original algorithm,
>> but it uses a hash table to simulate the sparse array.
>>
>> ---------------
>>
>> Subject: [PATCH] tracing: use hash table to simulate the sparse array
>>
>> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
>> wastes too much memory, so I remove it.
>>
>> A hash table is added to simulate the sparse array. And
>> map_pid_to_cmdline and map_cmdline_to_pid become light functions.
>>
>> map_pid_to_cmdline[pid] ==> map_pid_to_cmdline(pid)
>> map_cmdline_to_pid[idx] ==> map_cmdline_to_pid(idx)
>>
>> [Impact: save about 127k memory]
>>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> ---
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index 3aa0a0d..3526b9c 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -36,6 +36,7 @@
>> #include <linux/poll.h>
>> #include <linux/gfp.h>
>> #include <linux/fs.h>
>> +#include <linux/hash.h>
>>
>> #include "trace.h"
>> #include "trace_output.h"
>> @@ -648,10 +649,47 @@ void tracing_reset_current_online_cpus(void)
>> tracing_reset_online_cpus(&global_trace);
>> }
>>
>> -#define SAVED_CMDLINES 128
>> +#define SAVED_CMDLINES_SHIFT 7
>> +#define SAVED_CMDLINES (1 << 7)
>> #define NO_CMDLINE_MAP UINT_MAX
>> -static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
>> -static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
>> +
>> +struct cmdline_index {
>> + struct hlist_node node;
>> + unsigned int pid;
>> +};
>> +
>> +struct hlist_head map_head[SAVED_CMDLINES];
>> +struct cmdline_index indexes[SAVED_CMDLINES];
>> +
>> +static unsigned int map_pid_to_cmdline(unsigned int pid)
>> +{
>> + struct cmdline_index *index;
>> + struct hlist_node *n;
>> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
>> +
>> + hlist_for_each_entry(index, n, &map_head[hash], node) {
>> + if (index->pid == pid)
>> + return index - indexes;
>> + }
>> +
>> + return NO_CMDLINE_MAP;
>> +}
>> +
>> +static unsigned int map_cmdline_to_pid(unsigned int idx)
>> +{
>> + return indexes[idx].pid;
>> +}
>> +
>> +static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
>> +{
>> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
>> +
>> + if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
>> + hlist_del(&indexes[idx].node);
>> + indexes[idx].pid = pid;
>> + hlist_add_head(&indexes[idx].node, &map_head[hash]);
>> +}
>
>
>
> If I understand well, you won't ever have more than one
> entry per map_head[x]
The hash value of a pid determines which map_head[hash] is used.
There are maybe 2 pids with the same hash value. They will use
the same head map_head[hash] (but with different idx).
Then this map_head[hash] has more than one entry.
>
> So why are you using a hashlist that supports more than one
> entry (the use of hlist_head op).
>
> You could use a simple hashlist with only one entry on each
> index to map the pid.
>
> But the background idea of your patch looks good indeed.
>
> Thanks.
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] tracing: use hash table to simulate the sparse array
2009-07-01 2:31 ` Lai Jiangshan
@ 2009-07-01 19:25 ` Frederic Weisbecker
2009-07-02 1:29 ` Lai Jiangshan
0 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2009-07-01 19:25 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, LKML
On Wed, Jul 01, 2009 at 10:31:03AM +0800, Lai Jiangshan wrote:
> Frederic Weisbecker wrote:
> > On Tue, Jun 30, 2009 at 04:47:38PM +0800, Lai Jiangshan wrote:
> >> Lai Jiangshan wrote:
> >>> Subject: [PATCH] tracing: rewrite trace_save_cmdline()
> >>>
> >>> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
> >>> wastes too much memory, so I remove it.
> >>>
> >>> The old FIFO algorithm is replaced with a new one:
> >>> Open address hash table with double hash + tick-LRU.
> >>>
> >>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> >>> ---
> >> This patch reduces the memory usage.(save 128K memory in kernel)
> >> But it's too complicated, and it changes the original algorithm.
> >>
> >> This new patch does NOT change the original algorithm,
> >> but it uses a hash table to simulate the sparse array.
> >>
> >> ---------------
> >>
> >> Subject: [PATCH] tracing: use hash table to simulate the sparse array
> >>
> >> I found the sparse array map_pid_to_cmdline[PID_MAX_DEFAULT+1]
> >> wastes too much memory, so I remove it.
> >>
> >> A hash table is added to simulate the sparse array. And
> >> map_pid_to_cmdline and map_cmdline_to_pid become light functions.
> >>
> >> map_pid_to_cmdline[pid] ==> map_pid_to_cmdline(pid)
> >> map_cmdline_to_pid[idx] ==> map_cmdline_to_pid(idx)
> >>
> >> [Impact: save about 127k memory]
> >>
> >> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> >> ---
> >> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> >> index 3aa0a0d..3526b9c 100644
> >> --- a/kernel/trace/trace.c
> >> +++ b/kernel/trace/trace.c
> >> @@ -36,6 +36,7 @@
> >> #include <linux/poll.h>
> >> #include <linux/gfp.h>
> >> #include <linux/fs.h>
> >> +#include <linux/hash.h>
> >>
> >> #include "trace.h"
> >> #include "trace_output.h"
> >> @@ -648,10 +649,47 @@ void tracing_reset_current_online_cpus(void)
> >> tracing_reset_online_cpus(&global_trace);
> >> }
> >>
> >> -#define SAVED_CMDLINES 128
> >> +#define SAVED_CMDLINES_SHIFT 7
> >> +#define SAVED_CMDLINES (1 << 7)
> >> #define NO_CMDLINE_MAP UINT_MAX
> >> -static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
> >> -static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
> >> +
> >> +struct cmdline_index {
> >> + struct hlist_node node;
> >> + unsigned int pid;
> >> +};
> >> +
> >> +struct hlist_head map_head[SAVED_CMDLINES];
> >> +struct cmdline_index indexes[SAVED_CMDLINES];
> >> +
> >> +static unsigned int map_pid_to_cmdline(unsigned int pid)
> >> +{
> >> + struct cmdline_index *index;
> >> + struct hlist_node *n;
> >> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> >> +
> >> + hlist_for_each_entry(index, n, &map_head[hash], node) {
> >> + if (index->pid == pid)
> >> + return index - indexes;
> >> + }
> >> +
> >> + return NO_CMDLINE_MAP;
> >> +}
> >> +
> >> +static unsigned int map_cmdline_to_pid(unsigned int idx)
> >> +{
> >> + return indexes[idx].pid;
> >> +}
> >> +
> >> +static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
> >> +{
> >> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> >> +
> >> + if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
> >> + hlist_del(&indexes[idx].node);
> >> + indexes[idx].pid = pid;
> >> + hlist_add_head(&indexes[idx].node, &map_head[hash]);
> >> +}
> >
> >
> >
> > If I understand well, you won't ever have more than one
> > entry per map_head[x]
>
> The hash value of a pid determines which map_head[hash] is used.
> There are maybe 2 pids with the same hash value. They will use
> the same head map_head[hash] (but with different idx).
>
> Then this map_head[hash] has more than one entry.
Hmm, I'm confused.
When you map a new pid, you do the following:
+static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
+{
+ unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
+
+ if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
+ hlist_del(&indexes[idx].node);
+ indexes[idx].pid = pid;
+ hlist_add_head(&indexes[idx].node, &map_head[hash]);
+}
Then if there was a pid that had the same hash, it is deleted
from the hashlist and the new one steal his place, which
lead me to think you won't have more than one entry per hash.
> >
> > So why are you using a hashlist that supports more than one
> > entry (the use of hlist_head op).
> >
> > You could use a simple hashlist with only one entry on each
> > index to map the pid.
> >
> > But the background idea of your patch looks good indeed.
> >
> > Thanks.
> >
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] tracing: use hash table to simulate the sparse array
2009-07-01 19:25 ` Frederic Weisbecker
@ 2009-07-02 1:29 ` Lai Jiangshan
2009-07-06 0:56 ` Frederic Weisbecker
0 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-07-02 1:29 UTC (permalink / raw)
To: Frederic Weisbecker; +Cc: Steven Rostedt, Ingo Molnar, LKML
Frederic Weisbecker wrote:
>
> Hmm, I'm confused.
> When you map a new pid, you do the following:
>
> +static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
> +{
> + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> +
> + if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
> + hlist_del(&indexes[idx].node);
> + indexes[idx].pid = pid;
> + hlist_add_head(&indexes[idx].node, &map_head[hash]);
> +}
>
> Then if there was a pid that had the same hash, it is deleted
> from the hashlist and the new one steal his place, which
> lead me to think you won't have more than one entry per hash.
>
>
indexes[idx] is deleted, not "indexes[hash]".
idx is chosen by the FIFO algorithm(which I did not change).
It is the earliest mapped item, its place is replaced by new item.
So map_head[hash] may have 2 or more entries(with different idx).
The whole patch do one thing only:
implement map_pid_to_cmdline(pid), and make it equals to
original map_pid_to_cmdline[pid] always.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] tracing: use hash table to simulate the sparse array
2009-07-02 1:29 ` Lai Jiangshan
@ 2009-07-06 0:56 ` Frederic Weisbecker
0 siblings, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2009-07-06 0:56 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, LKML
On Thu, Jul 02, 2009 at 09:29:51AM +0800, Lai Jiangshan wrote:
> Frederic Weisbecker wrote:
> >
> > Hmm, I'm confused.
> > When you map a new pid, you do the following:
> >
> > +static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
> > +{
> > + unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
> > +
> > + if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
> > + hlist_del(&indexes[idx].node);
> > + indexes[idx].pid = pid;
> > + hlist_add_head(&indexes[idx].node, &map_head[hash]);
> > +}
> >
> > Then if there was a pid that had the same hash, it is deleted
> > from the hashlist and the new one steal his place, which
> > lead me to think you won't have more than one entry per hash.
> >
> >
>
> indexes[idx] is deleted, not "indexes[hash]".
> idx is chosen by the FIFO algorithm(which I did not change).
> It is the earliest mapped item, its place is replaced by new item.
>
> So map_head[hash] may have 2 or more entries(with different idx).
>
> The whole patch do one thing only:
> implement map_pid_to_cmdline(pid), and make it equals to
> original map_pid_to_cmdline[pid] always.
Aah ok, I understand now.
So, I retry a review:
+
+struct cmdline_index {
+ struct hlist_node node;
+ unsigned int pid;
+};
+
+struct hlist_head map_head[SAVED_CMDLINES];
map_head is too generic as a name.
We are in trace.c which is quite overloaded (about 4300 lines)
Then global variables names, even static (should be static, right?)
should be chosen carefully.
May be pid_to_cmdline_hashlist ?
+struct cmdline_index indexes[SAVED_CMDLINES];
I would suggest to actually rename this "indexes" to
map_cmdline_to_pid.
- indexes is too much generic. While reading what follows below,
I have been confused multiple times with this name.
- the function map_cmdline_to_pid() only deref indexes.
It means that we could reduce it to map_cmdline_to_pid[], that
would provide a more readable code.
Also, it seems it should it be static too.
+
+static unsigned int map_pid_to_cmdline(unsigned int pid)
+{
+ struct cmdline_index *index;
+ struct hlist_node *n;
+ unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
+
+ hlist_for_each_entry(index, n, &map_head[hash], node) {
+ if (index->pid == pid)
+ return index - indexes;
+ }
+
+ return NO_CMDLINE_MAP;
+}
+
+static unsigned int map_cmdline_to_pid(unsigned int idx)
+{
+ return indexes[idx].pid;
+}
+
+static void do_map_cmdline_index(unsigned int idx, unsigned int pid)
+{
+ unsigned int hash = hash_32(pid, SAVED_CMDLINES_SHIFT);
+
+ if (map_cmdline_to_pid(idx) != NO_CMDLINE_MAP)
+ hlist_del(&indexes[idx].node);
+ indexes[idx].pid = pid;
+ hlist_add_head(&indexes[idx].node, &map_head[hash]);
+}
+
Note that the algorithm change is not without side effect.
We are switching from an amortized constant access to another
unamortized one (or at least, not as much).
This function is executed in some hot paths.
I guess having more than one pid mapped in the same map_head[hash]
should be rare, but it would be better to have an Ack from Steve
before applying this patch.
Thanks,
Frederic.
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
static int cmdline_idx;
static raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
@@ -661,8 +699,7 @@ static atomic_t trace_record_cmdline_disabled __read_mostly;
static void trace_init_cmdlines(void)
{
- memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
- memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
+ memset(&indexes, NO_CMDLINE_MAP, sizeof(indexes));
cmdline_idx = 0;
}
@@ -754,9 +791,9 @@ void trace_stop_cmdline_recording(void);
static void trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned int idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ if (!tsk->pid)
return;
/*
@@ -768,22 +805,11 @@ static void trace_save_cmdline(struct task_struct *tsk)
if (!__raw_spin_trylock(&trace_cmdline_lock))
return;
- idx = map_pid_to_cmdline[tsk->pid];
+ idx = map_pid_to_cmdline(tsk->pid);
if (idx == NO_CMDLINE_MAP) {
idx = (cmdline_idx + 1) % SAVED_CMDLINES;
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- map_cmdline_to_pid[idx] = tsk->pid;
- map_pid_to_cmdline[tsk->pid] = idx;
+ do_map_cmdline_index(idx, tsk->pid);
cmdline_idx = idx;
}
@@ -802,14 +828,9 @@ void trace_find_cmdline(int pid, char comm[])
return;
}
- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
- }
-
preempt_disable();
__raw_spin_lock(&trace_cmdline_lock);
- map = map_pid_to_cmdline[pid];
+ map = map_pid_to_cmdline(pid);
if (map != NO_CMDLINE_MAP)
strcpy(comm, saved_cmdlines[map]);
else
@@ -2458,7 +2479,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
for (i = 0; i < SAVED_CMDLINES; i++) {
int r;
- pid = map_cmdline_to_pid[i];
+ pid = map_cmdline_to_pid(i);
if (pid == -1 || pid == NO_CMDLINE_MAP)
continue;
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-07-06 0:56 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-22 7:09 [PATCH] tracing: rewrite trace_save_cmdline() Lai Jiangshan
2009-06-30 8:47 ` [PATCH] tracing: use hash table to simulate the sparse array Lai Jiangshan
2009-06-30 11:59 ` Frederic Weisbecker
2009-07-01 2:31 ` Lai Jiangshan
2009-07-01 19:25 ` Frederic Weisbecker
2009-07-02 1:29 ` Lai Jiangshan
2009-07-06 0:56 ` Frederic Weisbecker
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).