* [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq
@ 2010-01-19 7:34 Lai Jiangshan
2010-01-20 19:22 ` Frederic Weisbecker
0 siblings, 1 reply; 4+ messages in thread
From: Lai Jiangshan @ 2010-01-19 7:34 UTC (permalink / raw)
To: Steven Rostedt, linux-kernel, Ingo Molnar, Andrew Morton,
Frederic Weisbecker
__print_flags() and __print_symbolic() use percpu trace_seq:
1) Its memory is preallocated, it wastes memory when we don't use tracing.
2) It wastes memory for multi-cpus system.
3) It disables preemption when it executes its core routine
"trace_seq_printf(s, "%s: ", #call);" and introduce latency
for more important process.
So we move this trace_seq to struct trace_iterator.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index be9ece5..348500d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -12,9 +12,6 @@ struct dentry;
#define FTRACE_SEQ_BUFSIZE PAGE_SIZE
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-DECLARE_PER_CPU(unsigned char[FTRACE_SEQ_BUFSIZE], ftrace_event_buffer);
-
struct trace_print_flags {
unsigned long mask;
const char *name;
@@ -60,6 +57,10 @@ struct trace_iterator {
struct trace_seq seq;
unsigned char buffer[FTRACE_SEQ_BUFSIZE];
+ /* trace_seq for __print_flags() and __print_symbolic() */
+ struct trace_seq tmp_seq;
+ unsigned char tmp_buffer[FTRACE_SEQ_BUFSIZE];
+
struct trace_entry *ent;
int leftover;
int cpu;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index f30f4d6..4807d1e 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -141,8 +141,7 @@
* struct trace_seq *s = &iter->seq;
* struct ftrace_raw_<call> *field; <-- defined in stage 1
* struct trace_entry *entry;
- * struct trace_seq *p;
- * unsigned char *buffer;
+ * struct trace_seq *p = &iter->tmp_seq;
* int ret;
*
* entry = iter->ent;
@@ -154,11 +153,8 @@
*
* field = (typeof(field))entry;
*
- * p = get_cpu_var(ftrace_event_seq);
- * buffer = get_cpu_var(ftrace_event_buffer);
- * trace_seq_init(p, buffer, FTRACE_SEQ_BUFSIZE);
+ * trace_seq_init(p, iter->tmp_buffer, FTRACE_SEQ_BUFSIZE);
* ret = trace_seq_printf(s, <TP_printk> "\n");
- * put_cpu();
* if (!ret)
* return TRACE_TYPE_PARTIAL_LINE;
*
@@ -208,10 +204,8 @@ ftrace_raw_output_id_##call(int event_id, const char *name, \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##call *field; \
struct trace_entry *entry; \
- struct trace_seq *p; \
- unsigned char *buffer; \
+ struct trace_seq *p = &iter->tmp_seq; \
int ret; \
- int cpu; \
\
entry = iter->ent; \
\
@@ -222,14 +216,10 @@ ftrace_raw_output_id_##call(int event_id, const char *name, \
\
field = (typeof(field))entry; \
\
- cpu = get_cpu(); \
- p = &per_cpu(ftrace_event_seq, cpu); \
- buffer = per_cpu(ftrace_event_buffer, cpu); \
- trace_seq_init(p, buffer, FTRACE_SEQ_BUFSIZE); \
+ trace_seq_init(p, iter->tmp_buffer, FTRACE_SEQ_BUFSIZE); \
ret = trace_seq_printf(s, "%s: ", name); \
if (ret) \
ret = trace_seq_printf(s, print); \
- put_cpu(); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
@@ -253,10 +243,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##template *field; \
struct trace_entry *entry; \
- struct trace_seq *p; \
- unsigned char *buffer; \
+ struct trace_seq *p = &iter->tmp_seq; \
int ret; \
- int cpu; \
\
entry = iter->ent; \
\
@@ -267,14 +255,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
\
field = (typeof(field))entry; \
\
- cpu = get_cpu(); \
- p = &per_cpu(ftrace_event_seq, cpu); \
- buffer = per_cpu(ftrace_event_buffer, cpu); \
trace_seq_init(p, buffer, FTRACE_SEQ_BUFSIZE); \
ret = trace_seq_printf(s, "%s: ", #call); \
if (ret) \
ret = trace_seq_printf(s, print); \
- put_cpu(); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 78f9825..f531a16 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -16,11 +16,6 @@
DECLARE_RWSEM(trace_event_mutex);
-DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
-DEFINE_PER_CPU(unsigned char[PAGE_SIZE], ftrace_event_buffer);
-EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
-EXPORT_PER_CPU_SYMBOL(ftrace_event_buffer);
-
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
static int next_event_type = __TRACE_LAST_TYPE + 1;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq
2010-01-19 7:34 [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq Lai Jiangshan
@ 2010-01-20 19:22 ` Frederic Weisbecker
2010-01-26 3:07 ` Lai Jiangshan
0 siblings, 1 reply; 4+ messages in thread
From: Frederic Weisbecker @ 2010-01-20 19:22 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Steven Rostedt, linux-kernel, Ingo Molnar, Andrew Morton
On Tue, Jan 19, 2010 at 03:34:22PM +0800, Lai Jiangshan wrote:
>
> __print_flags() and __print_symbolic() use percpu trace_seq:
>
> 1) Its memory is preallocated, it wastes memory when we don't use tracing.
> 2) It wastes memory for multi-cpus system.
> 3) It disables preemption when it executes its core routine
> "trace_seq_printf(s, "%s: ", #call);" and introduce latency
> for more important process.
>
> So we move this trace_seq to struct trace_iterator.
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index be9ece5..348500d 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -12,9 +12,6 @@ struct dentry;
>
> #define FTRACE_SEQ_BUFSIZE PAGE_SIZE
>
> -DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
> -DECLARE_PER_CPU(unsigned char[FTRACE_SEQ_BUFSIZE], ftrace_event_buffer);
> -
> struct trace_print_flags {
> unsigned long mask;
> const char *name;
> @@ -60,6 +57,10 @@ struct trace_iterator {
> struct trace_seq seq;
> unsigned char buffer[FTRACE_SEQ_BUFSIZE];
>
> + /* trace_seq for __print_flags() and __print_symbolic() */
> + struct trace_seq tmp_seq;
> + unsigned char tmp_buffer[FTRACE_SEQ_BUFSIZE];
Well, I don't like much that because it's a temporary buffer
in trace iter only used by few events.
But the problem is indeed tricky.
May be should we use a kmalloc in raw_output?
We could pass a trace_seq without buffer in ftrace_print_flags_seq
which can alloc the buffer and then free it after?
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq
2010-01-20 19:22 ` Frederic Weisbecker
@ 2010-01-26 3:07 ` Lai Jiangshan
2010-01-30 21:24 ` Frederic Weisbecker
0 siblings, 1 reply; 4+ messages in thread
From: Lai Jiangshan @ 2010-01-26 3:07 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Steven Rostedt, linux-kernel, Ingo Molnar, Andrew Morton
Frederic Weisbecker wrote:
> On Tue, Jan 19, 2010 at 03:34:22PM +0800, Lai Jiangshan wrote:
>> __print_flags() and __print_symbolic() use percpu trace_seq:
>>
>> 1) Its memory is preallocated, it wastes memory when we don't use tracing.
>> 2) It wastes memory for multi-cpus system.
>> 3) It disables preemption when it executes its core routine
>> "trace_seq_printf(s, "%s: ", #call);" and introduce latency
>> for more important process.
>>
>> So we move this trace_seq to struct trace_iterator.
>>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> ---
>> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
>> index be9ece5..348500d 100644
>> --- a/include/linux/ftrace_event.h
>> +++ b/include/linux/ftrace_event.h
>> @@ -12,9 +12,6 @@ struct dentry;
>>
>> #define FTRACE_SEQ_BUFSIZE PAGE_SIZE
>>
>> -DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
>> -DECLARE_PER_CPU(unsigned char[FTRACE_SEQ_BUFSIZE], ftrace_event_buffer);
>> -
>> struct trace_print_flags {
>> unsigned long mask;
>> const char *name;
>> @@ -60,6 +57,10 @@ struct trace_iterator {
>> struct trace_seq seq;
>> unsigned char buffer[FTRACE_SEQ_BUFSIZE];
>>
>> + /* trace_seq for __print_flags() and __print_symbolic() */
>> + struct trace_seq tmp_seq;
>> + unsigned char tmp_buffer[FTRACE_SEQ_BUFSIZE];
>
>
>
>
> Well, I don't like much that because it's a temporary buffer
> in trace iter only used by few events.
> But the problem is indeed tricky.
>
> May be should we use a kmalloc in raw_output?
>
But we have to preallocate it before raw_output().
a kmalloc in raw_output make ftrace_dump() unhappy.
At real system, tracepoints are used more frequently,
So it is not "only used by few events."
But maybe FTRACE_SEQ_BUFSIZE is too large, 128 is enough.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq
2010-01-26 3:07 ` Lai Jiangshan
@ 2010-01-30 21:24 ` Frederic Weisbecker
0 siblings, 0 replies; 4+ messages in thread
From: Frederic Weisbecker @ 2010-01-30 21:24 UTC (permalink / raw)
To: Lai Jiangshan; +Cc: Steven Rostedt, linux-kernel, Ingo Molnar, Andrew Morton
On Tue, Jan 26, 2010 at 11:07:05AM +0800, Lai Jiangshan wrote:
> Frederic Weisbecker wrote:
> > On Tue, Jan 19, 2010 at 03:34:22PM +0800, Lai Jiangshan wrote:
> >> __print_flags() and __print_symbolic() use percpu trace_seq:
> >>
> >> 1) Its memory is preallocated, it wastes memory when we don't use tracing.
> >> 2) It wastes memory for multi-cpus system.
> >> 3) It disables preemption when it executes its core routine
> >> "trace_seq_printf(s, "%s: ", #call);" and introduce latency
> >> for more important process.
> >>
> >> So we move this trace_seq to struct trace_iterator.
> >>
> >> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> >> ---
> >> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> >> index be9ece5..348500d 100644
> >> --- a/include/linux/ftrace_event.h
> >> +++ b/include/linux/ftrace_event.h
> >> @@ -12,9 +12,6 @@ struct dentry;
> >>
> >> #define FTRACE_SEQ_BUFSIZE PAGE_SIZE
> >>
> >> -DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
> >> -DECLARE_PER_CPU(unsigned char[FTRACE_SEQ_BUFSIZE], ftrace_event_buffer);
> >> -
> >> struct trace_print_flags {
> >> unsigned long mask;
> >> const char *name;
> >> @@ -60,6 +57,10 @@ struct trace_iterator {
> >> struct trace_seq seq;
> >> unsigned char buffer[FTRACE_SEQ_BUFSIZE];
> >>
> >> + /* trace_seq for __print_flags() and __print_symbolic() */
> >> + struct trace_seq tmp_seq;
> >> + unsigned char tmp_buffer[FTRACE_SEQ_BUFSIZE];
> >
> >
> >
> >
> > Well, I don't like much that because it's a temporary buffer
> > in trace iter only used by few events.
> > But the problem is indeed tricky.
> >
> > May be should we use a kmalloc in raw_output?
> >
>
> But we have to preallocate it before raw_output().
> a kmalloc in raw_output make ftrace_dump() unhappy.
Ah, right.
>
> At real system, tracepoints are used more frequently,
> So it is not "only used by few events."
No I mean, print_flags and print_symbolic are used by few
events.
> But maybe FTRACE_SEQ_BUFSIZE is too large, 128 is enough.
May be. Anyway, we should perhaps indeed make this change.
Steve?
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-01-30 21:24 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-19 7:34 [PATCH 5/6] tracing: reduce latency and remove percpu trace_seq Lai Jiangshan
2010-01-20 19:22 ` Frederic Weisbecker
2010-01-26 3:07 ` Lai Jiangshan
2010-01-30 21:24 ` Frederic Weisbecker
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox