* [PATCH 1/3][RFC] tracing: Add time stamp normalize to ring buffer clock selection
2009-11-12 5:43 [PATCH 0/3][RFC] tracing/x86: split sched_clock in recording trace time stamps Steven Rostedt
@ 2009-11-12 5:43 ` Steven Rostedt
2009-11-12 5:43 ` [PATCH 2/3][RFC] tracing: Make the trace_clock_local and trace_normalize_local weak Steven Rostedt
2009-11-12 5:43 ` [PATCH 3/3][RFC] tracing: Separate out x86 time stamp reading and ns conversion Steven Rostedt
2 siblings, 0 replies; 5+ messages in thread
From: Steven Rostedt @ 2009-11-12 5:43 UTC (permalink / raw)
To: linux-kernel, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker, Mathieu Desnoyers
[-- Attachment #1: 0001-tracing-Add-time-stamp-normalize-to-ring-buffer-cloc.patch --]
[-- Type: text/plain, Size: 4497 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
The ring buffer allows for selecting the clock to use for tracing
but does not allow for selecting the normalize function for
that clock.
This patch updates both the ring buffer as well as the tracing
files that select clocks for the ring buffer, to also have select
a normalizer for the clock.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/ring_buffer.h | 3 ++-
include/linux/trace_clock.h | 2 ++
kernel/trace/ring_buffer.c | 9 ++++++++-
kernel/trace/trace.c | 18 ++++++++++++++----
kernel/trace/trace_clock.c | 12 ++++++++++++
5 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5fcc31e..dd1aa89 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -170,7 +170,8 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
int cpu, u64 *ts);
void ring_buffer_set_clock(struct ring_buffer *buffer,
- u64 (*clock)(void));
+ u64 (*clock)(void),
+ void (*normalize)(int cpu, u64 *ts));
size_t ring_buffer_page_len(void *page);
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
index 7a81303..5ce4f17 100644
--- a/include/linux/trace_clock.h
+++ b/include/linux/trace_clock.h
@@ -16,4 +16,6 @@ extern u64 notrace trace_clock_local(void);
extern u64 notrace trace_clock(void);
extern u64 notrace trace_clock_global(void);
+extern void trace_normalize_local(int cpu, u64 *ts);
+
#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3ffa502..34f9b28 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -455,6 +455,7 @@ struct ring_buffer {
struct notifier_block cpu_notify;
#endif
u64 (*clock)(void);
+ void (*normalize)(int, u64 *);
};
struct ring_buffer_iter {
@@ -506,6 +507,10 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
{
/* Just stupid testing the normalize function and deltas */
*ts >>= DEBUG_SHIFT;
+
+ /* Any locking must be done by the caller */
+ if (buffer->normalize)
+ buffer->normalize(cpu, *ts);
}
EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
@@ -1176,9 +1181,11 @@ ring_buffer_free(struct ring_buffer *buffer)
EXPORT_SYMBOL_GPL(ring_buffer_free);
void ring_buffer_set_clock(struct ring_buffer *buffer,
- u64 (*clock)(void))
+ u64 (*clock)(void),
+ void (*normalize)(int cpu, u64 *ts))
{
buffer->clock = clock;
+ buffer->normalize = normalize;
}
static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9d3067a..723935b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -330,10 +330,18 @@ static const char *trace_options[] = {
static struct {
u64 (*func)(void);
+ void (*normalize)(int, u64 *);
const char *name;
} trace_clocks[] = {
- { trace_clock_local, "local" },
- { trace_clock_global, "global" },
+ {
+ .func = trace_clock_local,
+ .normalize = trace_normalize_local,
+ .name = "local"
+ },
+ {
+ .func = trace_clock_global,
+ .name = "global"
+ },
};
int trace_clock_id;
@@ -3409,9 +3417,11 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
mutex_lock(&trace_types_lock);
- ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func);
+ ring_buffer_set_clock(global_trace.buffer, trace_clocks[i].func,
+ trace_clocks[i].normalize);
if (max_tr.buffer)
- ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func);
+ ring_buffer_set_clock(max_tr.buffer, trace_clocks[i].func,
+ trace_clocks[i].normalize);
mutex_unlock(&trace_types_lock);
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 878c03f..168bf59 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -46,6 +46,18 @@ u64 notrace trace_clock_local(void)
}
/*
+ * trace_normalize_local(): Normalize trace_clock_local
+ * @cpu: cpu that the trace_clock_local was executed on
+ * @ts: the timestamp result from trace_clock_local
+ *
+ * Normalize the trace_clock_local value.
+ */
+void notrace trace_normalize_local(int cpu, u64 *ts)
+{
+ /* nop */
+}
+
+/*
* trace_clock(): 'inbetween' trace clock. Not completely serialized,
* but not completely incorrect when crossing CPUs either.
*
--
1.6.5
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/3][RFC] tracing: Make the trace_clock_local and trace_normalize_local weak
2009-11-12 5:43 [PATCH 0/3][RFC] tracing/x86: split sched_clock in recording trace time stamps Steven Rostedt
2009-11-12 5:43 ` [PATCH 1/3][RFC] tracing: Add time stamp normalize to ring buffer clock selection Steven Rostedt
@ 2009-11-12 5:43 ` Steven Rostedt
2009-11-12 5:43 ` [PATCH 3/3][RFC] tracing: Separate out x86 time stamp reading and ns conversion Steven Rostedt
2 siblings, 0 replies; 5+ messages in thread
From: Steven Rostedt @ 2009-11-12 5:43 UTC (permalink / raw)
To: linux-kernel, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker, Mathieu Desnoyers
[-- Attachment #1: 0002-tracing-Make-the-trace_clock_local-and-trace_normali.patch --]
[-- Type: text/plain, Size: 1727 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
The function trace_clock_local uses sched_clock for taking the time
stamp. For some archs this is not the most efficient method.
Making the trace_clock_local and trace_normalize_local functions weak
allow for archs to override what they are defined as.
This patch also removes some "notrace" annotations from the trace_clock.c
file since the entire trace directory has the -pg option removed
from compiling.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/trace_clock.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 168bf59..2b21f61 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -28,7 +28,7 @@
* Useful for tracing that does not cross to other CPUs nor
* does it go through idle events.
*/
-u64 notrace trace_clock_local(void)
+u64 __weak trace_clock_local(void)
{
u64 clock;
int resched;
@@ -52,7 +52,7 @@ u64 notrace trace_clock_local(void)
*
* Normalize the trace_clock_local value.
*/
-void notrace trace_normalize_local(int cpu, u64 *ts)
+void __weak trace_normalize_local(int cpu, u64 *ts)
{
/* nop */
}
@@ -65,7 +65,7 @@ void notrace trace_normalize_local(int cpu, u64 *ts)
* jitter between CPUs. So it's a pretty scalable clock, but there
* can be offsets in the trace data.
*/
-u64 notrace trace_clock(void)
+u64 trace_clock(void)
{
return cpu_clock(raw_smp_processor_id());
}
@@ -89,7 +89,7 @@ static struct {
.lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED,
};
-u64 notrace trace_clock_global(void)
+u64 trace_clock_global(void)
{
unsigned long flags;
int this_cpu;
--
1.6.5
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/3][RFC] tracing: Separate out x86 time stamp reading and ns conversion
2009-11-12 5:43 [PATCH 0/3][RFC] tracing/x86: split sched_clock in recording trace time stamps Steven Rostedt
2009-11-12 5:43 ` [PATCH 1/3][RFC] tracing: Add time stamp normalize to ring buffer clock selection Steven Rostedt
2009-11-12 5:43 ` [PATCH 2/3][RFC] tracing: Make the trace_clock_local and trace_normalize_local weak Steven Rostedt
@ 2009-11-12 5:43 ` Steven Rostedt
2009-11-12 8:53 ` Justin P. Mattock
2 siblings, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2009-11-12 5:43 UTC (permalink / raw)
To: linux-kernel, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker, Mathieu Desnoyers
[-- Attachment #1: 0003-tracing-Separate-out-x86-time-stamp-reading-and-ns-c.patch --]
[-- Type: text/plain, Size: 4373 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
This patch separates the trace_clock_local time stamp reading from
the conversion to nanoseconds for x86 by overriding the trace_clock_local
and trace_noramlize_local furctions. It uses the time stamp normalize
feature of the ring buffer to allow the ring buffer to record
the raw cycles and have the read side convert it to nanoseconds.
Before this separation, the cost of a trace was 179 ns, after it is
149 ns (30 ns performance boost 17%).
perf top before separation:
------------------------------------------------------------------------------
PerfTop: 1002 irqs/sec kernel:100.0% [1000Hz cpu-clock-msecs], (all, 4 CPUs)
------------------------------------------------------------------------------
samples pcnt kernel function
_______ _____ _______________
1653.00 - 25.0% : sched_clock
1147.00 - 17.4% : rb_reserve_next_event
865.00 - 13.1% : ring_buffer_lock_reserve
628.00 - 9.5% : rb_end_commit
521.00 - 7.9% : ring_buffer_unlock_commit
481.00 - 7.3% : __rb_reserve_next
392.00 - 5.9% : debug_smp_processor_id
284.00 - 4.3% : trace_clock_local
270.00 - 4.1% : ring_buffer_producer_thread [ring_buffer_benchmark]
108.00 - 1.6% : ring_buffer_event_data
100.00 - 1.5% : trace_recursive_unlock
70.00 - 1.1% : _spin_unlock_irq
30.00 - 0.5% : do_gettimeofday
21.00 - 0.3% : tick_nohz_stop_sched_tick
18.00 - 0.3% : read_tsc
and after:
------------------------------------------------------------------------------
PerfTop: 1024 irqs/sec kernel:100.0% [1000Hz cpu-clock-msecs], (all, 4 CPUs)
------------------------------------------------------------------------------
samples pcnt kernel function
_______ _____ _______________
1595.00 - 19.9% : rb_reserve_next_event
1521.00 - 18.9% : trace_clock_local
1393.00 - 17.3% : ring_buffer_lock_reserve
864.00 - 10.8% : __rb_reserve_next
745.00 - 9.3% : rb_end_commit
736.00 - 9.2% : ring_buffer_unlock_commit
395.00 - 4.9% : ring_buffer_producer_thread [ring_buffer_benchmark]
256.00 - 3.2% : debug_smp_processor_id
188.00 - 2.3% : ring_buffer_event_data
179.00 - 2.2% : trace_recursive_unlock
52.00 - 0.6% : _spin_unlock_irq
38.00 - 0.5% : read_tsc
34.00 - 0.4% : do_gettimeofday
27.00 - 0.3% : getnstimeofday
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
arch/x86/kernel/tsc.c | 35 +++++++++++++++++++++++++++++++++++
1 files changed, 35 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cd982f4..c6576f2 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -37,6 +37,7 @@ static int __read_mostly tsc_unstable;
static int __read_mostly tsc_disabled = -1;
static int tsc_clocksource_reliable;
+
/*
* Scheduler clock - returns current time in nanosec units.
*/
@@ -64,6 +65,40 @@ u64 native_sched_clock(void)
return __cycles_2_ns(this_offset);
}
+u64 trace_clock_local(void)
+{
+ u64 this_offset;
+
+ /*
+ * Fall back to jiffies if there's no TSC available:
+ * ( But note that we still use it if the TSC is marked
+ * unstable. We do this because unlike Time Of Day,
+ * the scheduler clock tolerates small errors and it's
+ * very important for it to be as fast as the platform
+ * can achive it. )
+ */
+ if (unlikely(tsc_disabled))
+ /* No locking but a rare wrong value is not a big deal: */
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+ /* read the Time Stamp Counter: */
+ rdtscll(this_offset);
+
+ return this_offset;
+}
+
+void trace_normalize_local(int cpu, u64 *ts)
+{
+ unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
+ unsigned long long cyc = *ts;
+
+ if (unlikely(tsc_disabled))
+ return;
+
+ ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR;
+ *ts = ns;
+}
+
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
--
1.6.5
^ permalink raw reply related [flat|nested] 5+ messages in thread