public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] [GIT PULL] tracing/ringbuffer: updates for tip
@ 2009-05-01  2:22 Steven Rostedt
  2009-05-01  2:22 ` [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries Steven Rostedt
                   ` (2 more replies)
  0 siblings, 3 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  2:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker


Ingo,

Here's some udpates to the ring buffer. Mainly, exporting the
counters (and adding new ones) to userspace. We keep track of entries
and overruns but currently they only get displayed with the latency
format.

This adds them per cpu in the directory:

  /debugfs/tracing/per_cpu/cpuX/stats


Please pull the latest tip/tracing/ftrace tree, which can be found at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace.git
tip/tracing/ftrace


Steven Rostedt (3):
      ring-buffer: add counters for commit overrun and nmi dropped entries
      tracing: export stats of ring buffers to userspace
      ring-buffer: make cpu buffer entries counter atomic

----
 include/linux/ring_buffer.h |    2 +
 kernel/trace/ring_buffer.c  |   70 +++++++++++++++++++++++++++++++++++-------
 kernel/trace/trace.c        |   42 +++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 12 deletions(-)
-- 

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  2:22 [PATCH 0/3] [GIT PULL] tracing/ringbuffer: updates for tip Steven Rostedt
@ 2009-05-01  2:22 ` Steven Rostedt
  2009-05-01  3:00   ` Andrew Morton
  2009-05-01  2:22 ` [PATCH 2/3] tracing: export stats of ring buffers to userspace Steven Rostedt
  2009-05-01  2:22 ` [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic Steven Rostedt
  2 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  2:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker

[-- Attachment #1: 0001-ring-buffer-add-counters-for-commit-overrun-and-nmi.patch --]
[-- Type: text/plain, Size: 4485 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

The WARN_ON in the ring buffer when a commit is preempted and the
buffer is filled by preceding writes can happen in normal operations.
The WARN_ON makes it look like a bug, not to mention, because
it does not stop tracing and calls printk which can also recurse, this
is prone to deadlock (the WARN_ON is not in a position to recurse).

This patch removes the WARN_ON and replaces it with a counter that
can be retrieved by a tracer. This counter is called commit_overrun.

While at it, I added a nmi_dropped counter to count any time an NMI entry
is dropped because the NMI could not take the spinlock.

[ Impact: prevent deadlock by printing normal case warning ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |    2 +
 kernel/trace/ring_buffer.c  |   52 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 1c2f809..f134582 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -153,6 +153,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f4cc590..dc8b2ab 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -402,6 +402,8 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			nmi_dropped;
+	unsigned long			commit_overrun;
 	unsigned long			overrun;
 	unsigned long			entries;
 	u64				write_stamp;
@@ -1216,8 +1218,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * simply fail.
 		 */
 		if (unlikely(in_nmi())) {
-			if (!__raw_spin_trylock(&cpu_buffer->lock))
+			if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+				cpu_buffer->nmi_dropped++;
 				goto out_reset;
+			}
 		} else
 			__raw_spin_lock(&cpu_buffer->lock);
 
@@ -1238,8 +1242,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		 * about it.
 		 */
 		if (unlikely(next_page == commit_page)) {
-			/* This can easily happen on small ring buffers */
-			WARN_ON_ONCE(buffer->pages > 2);
+			cpu_buffer->commit_overrun++;
 			goto out_reset;
 		}
 
@@ -1926,6 +1929,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
 /**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->nmi_dropped;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long ret;
+
+	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+		return 0;
+
+	cpu_buffer = buffer->buffers[cpu];
+	ret = cpu_buffer->commit_overrun;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
+/**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
  *
@@ -2595,6 +2639,8 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
+	cpu_buffer->nmi_dropped = 0;
+	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
 	cpu_buffer->entries = 0;
 
-- 
1.6.2.1

-- 

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/3] tracing: export stats of ring buffers to userspace
  2009-05-01  2:22 [PATCH 0/3] [GIT PULL] tracing/ringbuffer: updates for tip Steven Rostedt
  2009-05-01  2:22 ` [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries Steven Rostedt
@ 2009-05-01  2:22 ` Steven Rostedt
  2009-05-01  3:08   ` Frederic Weisbecker
  2009-05-01  2:22 ` [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic Steven Rostedt
  2 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  2:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker

[-- Attachment #1: 0002-tracing-export-stats-of-ring-buffers-to-userspace.patch --]
[-- Type: text/plain, Size: 2628 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

This patch adds stats to the ftrace ring buffers:

 # cat /debugfs/tracing/per_cpu/cpu0/stats
 entries: 42360
 overrun: 30509326
 commit overrun: 0
 nmi dropped: 0

Where entries are the total number of data entries in the buffer.

overrun is the number of entries not consumed and were overwritten by
the writer.

commit overrun is the number of entries dropped due to nested writers
wrapping the buffer before the initial writer finished the commit.

nmi dropped is the number of entries dropped due to the ring buffer
lock being held when an nmi was going to write to the ring buffer.
Note, this field will be meaningless and will go away when the ring
buffer becomes lockless.

[ Impact: let userspace know what is happening in the ring buffers ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c |   42 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f5427e0..74df029 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3595,6 +3595,45 @@ static const struct file_operations tracing_buffers_fops = {
 	.llseek		= no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+		   size_t count, loff_t *ppos)
+{
+	unsigned long cpu = (unsigned long)filp->private_data;
+	struct trace_array *tr = &global_trace;
+	struct trace_seq *s;
+	unsigned long cnt;
+
+	s = kmalloc(sizeof(*s), GFP_ATOMIC);
+	if (!s)
+		return ENOMEM;
+
+	trace_seq_init(s);
+
+	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "entries: %ld\n", cnt);
+
+	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
+	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
+
+	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+	kfree(s);
+
+	return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+	.open		= tracing_open_generic,
+	.read		= tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3708,6 +3747,9 @@ static void tracing_init_debugfs_percpu(long cpu)
 
 	trace_create_file("trace_pipe_raw", 0444, d_cpu,
 			(void *) cpu, &tracing_buffers_fops);
+
+	trace_create_file("stats", 0444, d_cpu,
+			(void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
-- 
1.6.2.1

-- 

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01  2:22 [PATCH 0/3] [GIT PULL] tracing/ringbuffer: updates for tip Steven Rostedt
  2009-05-01  2:22 ` [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries Steven Rostedt
  2009-05-01  2:22 ` [PATCH 2/3] tracing: export stats of ring buffers to userspace Steven Rostedt
@ 2009-05-01  2:22 ` Steven Rostedt
  2009-05-01  3:06   ` Andrew Morton
  2009-05-01 11:50   ` Ingo Molnar
  2 siblings, 2 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  2:22 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker

[-- Attachment #1: 0003-ring-buffer-make-cpu-buffer-entries-counter-atomic.patch --]
[-- Type: text/plain, Size: 3245 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

The entries counter in cpu buffer is not atomic. Although it only gets
updated by a single CPU, interrupts may come in and update the counter
too. This would cause missing entries to be added.

[ Impact: keep accurate count of cpu buffer entries ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dc8b2ab..3b9b32b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -405,7 +405,7 @@ struct ring_buffer_per_cpu {
 	unsigned long			nmi_dropped;
 	unsigned long			commit_overrun;
 	unsigned long			overrun;
-	unsigned long			entries;
+	atomic_t			entries;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -997,7 +997,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
 		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
 		cpu_buffer->overrun++;
-		cpu_buffer->entries--;
+		atomic_dec(&cpu_buffer->entries);
 	}
 }
 
@@ -1588,7 +1588,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
 		      struct ring_buffer_event *event)
 {
-	cpu_buffer->entries++;
+	atomic_inc(&cpu_buffer->entries);
 
 	/* Only process further if we own the commit */
 	if (!rb_is_commit(cpu_buffer, event))
@@ -1722,7 +1722,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
 	 * The commit is still visible by the reader, so we
 	 * must increment entries.
 	 */
-	cpu_buffer->entries++;
+	atomic_inc(&cpu_buffer->entries);
  out:
 	/*
 	 * If a write came in and pushed the tail page
@@ -1902,7 +1902,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->entries;
+	ret = atomic_read(&cpu_buffer->entries);
 
 	return ret;
 }
@@ -1985,7 +1985,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		entries += cpu_buffer->entries;
+		entries += atomic_read(&cpu_buffer->entries);
 	}
 
 	return entries;
@@ -2225,7 +2225,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 
 	if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 			|| rb_discarded_event(event))
-		cpu_buffer->entries--;
+		atomic_dec(&cpu_buffer->entries);
 
 	rb_update_read_stamp(cpu_buffer, event);
 
@@ -2642,7 +2642,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->nmi_dropped = 0;
 	cpu_buffer->commit_overrun = 0;
 	cpu_buffer->overrun = 0;
-	cpu_buffer->entries = 0;
+	atomic_set(&cpu_buffer->entries, 0);
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
@@ -2813,7 +2813,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
 		/* Only count data entries */
 		if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 			continue;
-		cpu_buffer->entries--;
+		atomic_dec(&cpu_buffer->entries);
 	}
 	__raw_spin_unlock(&cpu_buffer->lock);
 }
-- 
1.6.2.1

-- 

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  2:22 ` [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries Steven Rostedt
@ 2009-05-01  3:00   ` Andrew Morton
  2009-05-01  3:11     ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Morton @ 2009-05-01  3:00 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker

On Thu, 30 Apr 2009 22:22:11 -0400 Steven Rostedt <rostedt@goodmis.org> wrote:

> +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
> +{
> +	struct ring_buffer_per_cpu *cpu_buffer;
> +	unsigned long ret;
> +
> +	if (!cpumask_test_cpu(cpu, buffer->cpumask))
> +		return 0;
> +
> +	cpu_buffer = buffer->buffers[cpu];
> +	ret = cpu_buffer->nmi_dropped;
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
> +
> +/**
> + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
> + * @buffer: The ring buffer
> + * @cpu: The per CPU buffer to get the number of overruns from
> + */
> +unsigned long
> +ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
> +{
> +	struct ring_buffer_per_cpu *cpu_buffer;
> +	unsigned long ret;
> +
> +	if (!cpumask_test_cpu(cpu, buffer->cpumask))
> +		return 0;
> +
> +	cpu_buffer = buffer->buffers[cpu];
> +	ret = cpu_buffer->commit_overrun;
> +
> +	return ret;
> +}

hm.  Four functions in a row, all of which differ in but a single line.

unsigned long
ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
{
	return some_common_function(buffer, cpu)->commit_overrun;
}

?


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01  2:22 ` [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic Steven Rostedt
@ 2009-05-01  3:06   ` Andrew Morton
  2009-05-01  3:28     ` Steven Rostedt
  2009-05-01 11:50   ` Ingo Molnar
  1 sibling, 1 reply; 31+ messages in thread
From: Andrew Morton @ 2009-05-01  3:06 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker

On Thu, 30 Apr 2009 22:22:13 -0400 Steven Rostedt <rostedt@goodmis.org> wrote:

> @@ -405,7 +405,7 @@ struct ring_buffer_per_cpu {
>  	unsigned long			nmi_dropped;
>  	unsigned long			commit_overrun;
>  	unsigned long			overrun;
> -	unsigned long			entries;
> +	atomic_t			entries;

This switches `entries' from unsigned-64-bit to signed-32-bit.

The signedness thing is just an unfortunate thing with atomic_t and probably
doesn't matter.

The change in size might or might not be a bug - that's for the person
who didn't document the data structure to work out ;)




/*
 * head_page == tail_page && head == tail then buffer is empty.
 */
struct ring_buffer_per_cpu {

the comment refers to vaporfields?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/3] tracing: export stats of ring buffers to userspace
  2009-05-01  2:22 ` [PATCH 2/3] tracing: export stats of ring buffers to userspace Steven Rostedt
@ 2009-05-01  3:08   ` Frederic Weisbecker
  2009-05-01  3:23     ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Frederic Weisbecker @ 2009-05-01  3:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Andrew Morton

On Thu, Apr 30, 2009 at 10:22:12PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <srostedt@redhat.com>
> 
> This patch adds stats to the ftrace ring buffers:
> 
>  # cat /debugfs/tracing/per_cpu/cpu0/stats
>  entries: 42360
>  overrun: 30509326
>  commit overrun: 0
>  nmi dropped: 0
> 
> Where entries are the total number of data entries in the buffer.
> 
> overrun is the number of entries not consumed and were overwritten by
> the writer.
> 
> commit overrun is the number of entries dropped due to nested writers
> wrapping the buffer before the initial writer finished the commit.


I feel a bit confused with this one.
How such a thing can happen? The write page and the commit page
are not the same. So is that because we can have (ring-buffer inspires
all of us to try ascii-art):


          Write page                      Commit page (which becomes new write page) 
------------------------------------ -----------------
            |           |          | |               |
  Writer 1  | Writer 2  | Writer n | | Writer n + 1  | .....
  reserved  | reserved  | reserved | | reserved      |
-----------------------------------  ----------------
    |                                             ^
    |                                             |
    ---------------- Was supposed to commit here--|
 

I know this is silly, my picture seem to show a data copy whereas
the ring buffer deals with page pointers.
But the commit page on the ring buffer is a mistery for me.
Just because you haven't drawn in in ascii in your comments :)

Thanks.
Frederic.

> nmi dropped is the number of entries dropped due to the ring buffer
> lock being held when an nmi was going to write to the ring buffer.
> Note, this field will be meaningless and will go away when the ring
> buffer becomes lockless.
> 
> [ Impact: let userspace know what is happening in the ring buffers ]
> 
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
>  kernel/trace/trace.c |   42 ++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 42 insertions(+), 0 deletions(-)
> 
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index f5427e0..74df029 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3595,6 +3595,45 @@ static const struct file_operations tracing_buffers_fops = {
>  	.llseek		= no_llseek,
>  };
>  
> +static ssize_t
> +tracing_stats_read(struct file *filp, char __user *ubuf,
> +		   size_t count, loff_t *ppos)
> +{
> +	unsigned long cpu = (unsigned long)filp->private_data;
> +	struct trace_array *tr = &global_trace;
> +	struct trace_seq *s;
> +	unsigned long cnt;
> +
> +	s = kmalloc(sizeof(*s), GFP_ATOMIC);
> +	if (!s)
> +		return ENOMEM;
> +
> +	trace_seq_init(s);
> +
> +	cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
> +	trace_seq_printf(s, "entries: %ld\n", cnt);
> +
> +	cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
> +	trace_seq_printf(s, "overrun: %ld\n", cnt);
> +
> +	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
> +	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
> +
> +	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
> +	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
> +
> +	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
> +
> +	kfree(s);
> +
> +	return count;
> +}
> +
> +static const struct file_operations tracing_stats_fops = {
> +	.open		= tracing_open_generic,
> +	.read		= tracing_stats_read,
> +};
> +
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  
>  int __weak ftrace_arch_read_dyn_info(char *buf, int size)
> @@ -3708,6 +3747,9 @@ static void tracing_init_debugfs_percpu(long cpu)
>  
>  	trace_create_file("trace_pipe_raw", 0444, d_cpu,
>  			(void *) cpu, &tracing_buffers_fops);
> +
> +	trace_create_file("stats", 0444, d_cpu,
> +			(void *) cpu, &tracing_stats_fops);
>  }
>  
>  #ifdef CONFIG_FTRACE_SELFTEST
> -- 
> 1.6.2.1
> 
> -- 


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  3:00   ` Andrew Morton
@ 2009-05-01  3:11     ` Steven Rostedt
  2009-05-01  3:22       ` Andrew Morton
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  3:11 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker


On Thu, 30 Apr 2009, Andrew Morton wrote:

> On Thu, 30 Apr 2009 22:22:11 -0400 Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
> > +{
> > +	struct ring_buffer_per_cpu *cpu_buffer;
> > +	unsigned long ret;
> > +
> > +	if (!cpumask_test_cpu(cpu, buffer->cpumask))
> > +		return 0;
> > +
> > +	cpu_buffer = buffer->buffers[cpu];
> > +	ret = cpu_buffer->nmi_dropped;
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
> > +
> > +/**
> > + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
> > + * @buffer: The ring buffer
> > + * @cpu: The per CPU buffer to get the number of overruns from
> > + */
> > +unsigned long
> > +ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
> > +{
> > +	struct ring_buffer_per_cpu *cpu_buffer;
> > +	unsigned long ret;
> > +
> > +	if (!cpumask_test_cpu(cpu, buffer->cpumask))
> > +		return 0;
> > +
> > +	cpu_buffer = buffer->buffers[cpu];
> > +	ret = cpu_buffer->commit_overrun;
> > +
> > +	return ret;
> > +}
> 
> hm.  Four functions in a row, all of which differ in but a single line.
> 
> unsigned long
> ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
> {
> 	return some_common_function(buffer, cpu)->commit_overrun;
> }
> 
> ?

But that actually takes thought. I like my cut and paste ;-)

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  3:11     ` Steven Rostedt
@ 2009-05-01  3:22       ` Andrew Morton
  2009-05-01  3:33         ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Morton @ 2009-05-01  3:22 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker

On Thu, 30 Apr 2009 23:11:52 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:

> > hm.  Four functions in a row, all of which differ in but a single line.
> > 
> > unsigned long
> > ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
> > {
> > 	return some_common_function(buffer, cpu)->commit_overrun;
> > }
> > 
> > ?
> 
> But that actually takes thought. I like my cut and paste ;-)

Well, we all like your cut-n-paste, Steve.

Should you decide to retire the paste pot, and if you can bear to move
`struct ring_buffer' into a header, these four functions could be made
static inlines, with pleasant runtime results.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/3] tracing: export stats of ring buffers to userspace
  2009-05-01  3:08   ` Frederic Weisbecker
@ 2009-05-01  3:23     ` Steven Rostedt
  2009-05-01 12:43       ` Frederic Weisbecker
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  3:23 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: linux-kernel, Ingo Molnar, Andrew Morton


On Fri, 1 May 2009, Frederic Weisbecker wrote:

> On Thu, Apr 30, 2009 at 10:22:12PM -0400, Steven Rostedt wrote:
> > From: Steven Rostedt <srostedt@redhat.com>
> > 
> > This patch adds stats to the ftrace ring buffers:
> > 
> >  # cat /debugfs/tracing/per_cpu/cpu0/stats
> >  entries: 42360
> >  overrun: 30509326
> >  commit overrun: 0
> >  nmi dropped: 0
> > 
> > Where entries are the total number of data entries in the buffer.
> > 
> > overrun is the number of entries not consumed and were overwritten by
> > the writer.
> > 
> > commit overrun is the number of entries dropped due to nested writers
> > wrapping the buffer before the initial writer finished the commit.
> 
> 
> I feel a bit confused with this one.
> How such a thing can happen? The write page and the commit page
> are not the same. So is that because we can have (ring-buffer inspires
> all of us to try ascii-art):
> 
> 
>           Write page                      Commit page (which becomes new write page) 
> ------------------------------------ -----------------
>             |           |          | |               |
>   Writer 1  | Writer 2  | Writer n | | Writer n + 1  | .....
>   reserved  | reserved  | reserved | | reserved      |
> -----------------------------------  ----------------
>     |                                             ^
>     |                                             |
>     ---------------- Was supposed to commit here--|
>  
> 
> I know this is silly, my picture seem to show a data copy whereas
> the ring buffer deals with page pointers.
> But the commit page on the ring buffer is a mistery for me.
> Just because you haven't drawn in in ascii in your comments :)
> 

I have a bunch of ascii art that explains all this in my documentation 
that details the lockless version.

The commit page is the page that holds the last full commit.

 ring_buffer_unlock_commit()

On ring_buffer_lock_reserve() we reserve some data after the last commit.

            commit
               |
               V
    +---+    +---+    +---+    +---+
<---|   |--->|   |--->|   |--->|   |--->
--->|   |<---|   |<---|   |<---|   |<---
    +---+    +---+    +---+    +---+
               ^
               |
             tail (writer)

We do not disable interrupts (or softirqs) between 
ring_buffer_lock_reserve and ring_buffer_unlock_commit. If we get 
preempted by an interrupt or softirq, and it writes to the ring buffer, it 
will move the tail, but not the commit. Only the outer most writer (non 
nested) can do that:

            commit     
               |
               V
    +---+    +---+    +---+    +---+
<---|   |--->|   |--->|   |--->|   |--->
--->|   |<---|   |<---|   |<---|   |<---
    +---+    +---+    +---+    +---+
                        ^
                        |
                      tail (writer)


But lets say we are running the function graph tracer along with the event 
tracer. And to save space, we shrunk the ring buffer down to a few pages. 
(more than 2)  We are writing an event and get preempted by an interrupt 
followed by several softirqs, and these softirqs perform a lot of 
functions. It can push the tail all around the buffer:

            commit
               |
               V
    +---+    +---+    +---+    +---+
<---|   |--->|   |--->|   |--->|   |--->
--->|   |<---|   |<---|   |<---|   |<---
    +---+    +---+    +---+    +---+
      ^
      |
    tail (writer)


This happens before we even finish the original write. But the tail can 
not push the commit forward, because when we fall out of this stack of 
writers, that original writer is in the process of writing into the ring 
buffer. Thus we need to drop any more entries that want to push the tail 
pointer onto the commit pointer.

Thus, when this happens we record it with commit_overrun.

-- Steve




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01  3:06   ` Andrew Morton
@ 2009-05-01  3:28     ` Steven Rostedt
  0 siblings, 0 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  3:28 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker




On Thu, 30 Apr 2009, Andrew Morton wrote:

> On Thu, 30 Apr 2009 22:22:13 -0400 Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > @@ -405,7 +405,7 @@ struct ring_buffer_per_cpu {
> >  	unsigned long			nmi_dropped;
> >  	unsigned long			commit_overrun;
> >  	unsigned long			overrun;
> > -	unsigned long			entries;
> > +	atomic_t			entries;
> 
> This switches `entries' from unsigned-64-bit to signed-32-bit.

Good point. Although I doubt we will have more that 2billion entries in 
one cpu buffer. But it can happen. I'll change that to 64 bit atomic.


> 
> The signedness thing is just an unfortunate thing with atomic_t and probably
> doesn't matter.
> 
> The change in size might or might not be a bug - that's for the person
> who didn't document the data structure to work out ;)
> 
> 
> 
> 
> /*
>  * head_page == tail_page && head == tail then buffer is empty.
>  */
> struct ring_buffer_per_cpu {
> 
> the comment refers to vaporfields?
> 

Ug, the head_page and tail_page are in this struct. But the "head" and 
"tail" are with in the "head_page" and "tail_page" respectively.

-- Steve




^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  3:22       ` Andrew Morton
@ 2009-05-01  3:33         ` Steven Rostedt
  2009-05-01  3:38           ` Andrew Morton
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  3:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker


On Thu, 30 Apr 2009, Andrew Morton wrote:

> On Thu, 30 Apr 2009 23:11:52 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > > hm.  Four functions in a row, all of which differ in but a single line.
> > > 
> > > unsigned long
> > > ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
> > > {
> > > 	return some_common_function(buffer, cpu)->commit_overrun;
> > > }
> > > 
> > > ?
> > 
> > But that actually takes thought. I like my cut and paste ;-)
> 
> Well, we all like your cut-n-paste, Steve.

It helps with my commit count.

> 
> Should you decide to retire the paste pot, and if you can bear to move
> `struct ring_buffer' into a header, these four functions could be made
> static inlines, with pleasant runtime results.

Well, the only users of these functions are those that output to user 
land. The latency format and the stat file. static inlines wont help much 
with performance there.

But I do think it is cleaner to consolidate them.

Thanks,

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  3:33         ` Steven Rostedt
@ 2009-05-01  3:38           ` Andrew Morton
  2009-05-01  3:55             ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Andrew Morton @ 2009-05-01  3:38 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Frederic Weisbecker

On Thu, 30 Apr 2009 23:33:31 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:

> > Should you decide to retire the paste pot, and if you can bear to move
> > `struct ring_buffer' into a header, these four functions could be made
> > static inlines, with pleasant runtime results.
> 
> Well, the only users of these functions are those that output to user 
> land. The latency format and the stat file.

OK, that's slowpath.  They were exported to modules - are these files a
per-tracer thing?


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries
  2009-05-01  3:38           ` Andrew Morton
@ 2009-05-01  3:55             ` Steven Rostedt
  0 siblings, 0 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01  3:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: LKML, Ingo Molnar, Frederic Weisbecker, Robert Richter


On Thu, 30 Apr 2009, Andrew Morton wrote:

> On Thu, 30 Apr 2009 23:33:31 -0400 (EDT) Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > > Should you decide to retire the paste pot, and if you can bear to move
> > > `struct ring_buffer' into a header, these four functions could be made
> > > static inlines, with pleasant runtime results.
> > 
> > Well, the only users of these functions are those that output to user 
> > land. The latency format and the stat file.
> 
> OK, that's slowpath.  They were exported to modules - are these files a
> per-tracer thing?

A ring buffer can be allocated per tracer (although ftrace does not -yet- 
do this). Some of these are for per_cpu parts of the ring buffer. The ring 
buffer has a buffer per cpu.

As for the EXPORT_SYMBOLs. See this commit (and I added Robert to the Cc)

commit c4f50183f90fb1fd99aa5941f01b90cd1b882d2e
Author: Robert Richter <robert.richter@amd.com>
Date:   Thu Dec 11 16:49:22 2008 +0100

    ring_buffer: adding EXPORT_SYMBOLs
    
    I added EXPORT_SYMBOL_GPLs for all functions part of the API
    (ring_buffer.h). This is required since oprofile is using the ring
    buffer and the compilation as modules would fail otherwise.
    
    Signed-off-by: Robert Richter <robert.richter@amd.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>


So it looks like it was just a "give modules all access to the ring 
buffer" commit.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01  2:22 ` [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic Steven Rostedt
  2009-05-01  3:06   ` Andrew Morton
@ 2009-05-01 11:50   ` Ingo Molnar
  2009-05-01 13:42     ` Frederic Weisbecker
  2009-05-01 14:23     ` Steven Rostedt
  1 sibling, 2 replies; 31+ messages in thread
From: Ingo Molnar @ 2009-05-01 11:50 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


* Steven Rostedt <rostedt@goodmis.org> wrote:

> From: Steven Rostedt <srostedt@redhat.com>
> 
> The entries counter in cpu buffer is not atomic. Although it only 
> gets updated by a single CPU, interrupts may come in and update 
> the counter too. This would cause missing entries to be added.

> -	unsigned long			entries;
> +	atomic_t			entries;

Hm, that's not really good as atomics can be rather expensive and 
this is the fastpath.

This is the upteenth time or so that the fact that we do not disable 
irqs while generating trace entries bites us in one way or another. 
IRQs can come in and confuse function trace output, etc. etc.

Please lets do what i suggested a long time ago: disable irqs _once_ 
in any trace point and run atomically from that point on, and enable 
them once, at the end.

The cost is very small and it turns into a win immediately by 
elimination of a _single_ atomic instruction. (even on Nehalem they 
cost 20 cycles. More on older CPUs.) We can drop the preempt-count 
disable/enable as well and a lot of racy code as well. Please.

	Ingo

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/3] tracing: export stats of ring buffers to userspace
  2009-05-01  3:23     ` Steven Rostedt
@ 2009-05-01 12:43       ` Frederic Weisbecker
  0 siblings, 0 replies; 31+ messages in thread
From: Frederic Weisbecker @ 2009-05-01 12:43 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Ingo Molnar, Andrew Morton

On Thu, Apr 30, 2009 at 11:23:52PM -0400, Steven Rostedt wrote:
> 
> On Fri, 1 May 2009, Frederic Weisbecker wrote:
> 
> > On Thu, Apr 30, 2009 at 10:22:12PM -0400, Steven Rostedt wrote:
> > > From: Steven Rostedt <srostedt@redhat.com>
> > > 
> > > This patch adds stats to the ftrace ring buffers:
> > > 
> > >  # cat /debugfs/tracing/per_cpu/cpu0/stats
> > >  entries: 42360
> > >  overrun: 30509326
> > >  commit overrun: 0
> > >  nmi dropped: 0
> > > 
> > > Where entries are the total number of data entries in the buffer.
> > > 
> > > overrun is the number of entries not consumed and were overwritten by
> > > the writer.
> > > 
> > > commit overrun is the number of entries dropped due to nested writers
> > > wrapping the buffer before the initial writer finished the commit.
> > 
> > 
> > I feel a bit confused with this one.
> > How such a thing can happen? The write page and the commit page
> > are not the same. So is that because we can have (ring-buffer inspires
> > all of us to try ascii-art):
> > 
> > 
> >           Write page                      Commit page (which becomes new write page) 
> > ------------------------------------ -----------------
> >             |           |          | |               |
> >   Writer 1  | Writer 2  | Writer n | | Writer n + 1  | .....
> >   reserved  | reserved  | reserved | | reserved      |
> > -----------------------------------  ----------------
> >     |                                             ^
> >     |                                             |
> >     ---------------- Was supposed to commit here--|
> >  
> > 
> > I know this is silly, my picture seem to show a data copy whereas
> > the ring buffer deals with page pointers.
> > But the commit page on the ring buffer is a mistery for me.
> > Just because you haven't drawn in in ascii in your comments :)
> > 
> 
> I have a bunch of ascii art that explains all this in my documentation 
> that details the lockless version.
> 
> The commit page is the page that holds the last full commit.
> 
>  ring_buffer_unlock_commit()
> 
> On ring_buffer_lock_reserve() we reserve some data after the last commit.
> 
>             commit
>                |
>                V
>     +---+    +---+    +---+    +---+
> <---|   |--->|   |--->|   |--->|   |--->
> --->|   |<---|   |<---|   |<---|   |<---
>     +---+    +---+    +---+    +---+
>                ^
>                |
>              tail (writer)
> 
> We do not disable interrupts (or softirqs) between 
> ring_buffer_lock_reserve and ring_buffer_unlock_commit. If we get 
> preempted by an interrupt or softirq, and it writes to the ring buffer, it 
> will move the tail, but not the commit. Only the outer most writer (non 
> nested) can do that:
> 
>             commit     
>                |
>                V
>     +---+    +---+    +---+    +---+
> <---|   |--->|   |--->|   |--->|   |--->
> --->|   |<---|   |<---|   |<---|   |<---
>     +---+    +---+    +---+    +---+
>                         ^
>                         |
>                       tail (writer)



Aah, now I understand what does rb_set_commit_to_write()


 
> 
> But lets say we are running the function graph tracer along with the event 
> tracer. And to save space, we shrunk the ring buffer down to a few pages. 
> (more than 2)  We are writing an event and get preempted by an interrupt 
> followed by several softirqs, and these softirqs perform a lot of 
> functions. It can push the tail all around the buffer:
> 
>             commit
>                |
>                V
>     +---+    +---+    +---+    +---+
> <---|   |--->|   |--->|   |--->|   |--->
> --->|   |<---|   |<---|   |<---|   |<---
>     +---+    +---+    +---+    +---+
>       ^
>       |
>     tail (writer)
> 
> 
> This happens before we even finish the original write. But the tail can 
> not push the commit forward, because when we fall out of this stack of 
> writers, that original writer is in the process of writing into the ring 
> buffer. Thus we need to drop any more entries that want to push the tail 
> pointer onto the commit pointer.
> 
> Thus, when this happens we record it with commit_overrun.
> 
> -- Steve


Ok thanks for these explanations!

I hope your lockless ring buffer will be posted soon (any news about
the patent?) :-)



^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 11:50   ` Ingo Molnar
@ 2009-05-01 13:42     ` Frederic Weisbecker
  2009-05-01 14:28       ` Steven Rostedt
  2009-05-01 14:23     ` Steven Rostedt
  1 sibling, 1 reply; 31+ messages in thread
From: Frederic Weisbecker @ 2009-05-01 13:42 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, linux-kernel, Andrew Morton

On Fri, May 01, 2009 at 01:50:47PM +0200, Ingo Molnar wrote:
> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > From: Steven Rostedt <srostedt@redhat.com>
> > 
> > The entries counter in cpu buffer is not atomic. Although it only 
> > gets updated by a single CPU, interrupts may come in and update 
> > the counter too. This would cause missing entries to be added.
> 
> > -	unsigned long			entries;
> > +	atomic_t			entries;
> 
> Hm, that's not really good as atomics can be rather expensive and 
> this is the fastpath.
> 
> This is the upteenth time or so that the fact that we do not disable 
> irqs while generating trace entries bites us in one way or another. 
> IRQs can come in and confuse function trace output, etc. etc.
> 
> Please lets do what i suggested a long time ago: disable irqs _once_ 
> in any trace point and run atomically from that point on, and enable 
> them once, at the end.
> 
> The cost is very small and it turns into a win immediately by 
> elimination of a _single_ atomic instruction. (even on Nehalem they 
> cost 20 cycles. More on older CPUs.) We can drop the preempt-count 
> disable/enable as well and a lot of racy code as well. Please.
> 
> 	Ingo


I also suspect one other good effect on doing this.

As you know, between a lock_reserve and a discard, several interrupts
can trigger some traces. It means that if some rooms have already been
reserved, the discard will really create a discarded entry and we
can't reuse it.

For example in the case of filters with lock tracing, we rapidly run
into entries overriden, making the lock events tracing about useless
because we rapidly lose everything.

At least that's an effect I observed. I'm not sure the discard is the
real cause but it seems to make sense.

That's a pity because believe me it is very useful to hunt a softlockup.

Of course it doesn't prevent from NMI tempest, but we already have
protections for that.

Frederic.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 11:50   ` Ingo Molnar
  2009-05-01 13:42     ` Frederic Weisbecker
@ 2009-05-01 14:23     ` Steven Rostedt
  2009-05-01 16:14       ` Steven Rostedt
  1 sibling, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 14:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Ingo Molnar wrote:

> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > From: Steven Rostedt <srostedt@redhat.com>
> > 
> > The entries counter in cpu buffer is not atomic. Although it only 
> > gets updated by a single CPU, interrupts may come in and update 
> > the counter too. This would cause missing entries to be added.
> 
> > -	unsigned long			entries;
> > +	atomic_t			entries;
> 
> Hm, that's not really good as atomics can be rather expensive and 
> this is the fastpath.

Actually, it could be local_t. I used that in a lot of the other places.
The race is with on CPU not other CPUs, and on archs like x86 there
is not cost of the "LOCK".

> 
> This is the upteenth time or so that the fact that we do not disable 
> irqs while generating trace entries bites us in one way or another. 
> IRQs can come in and confuse function trace output, etc. etc.

Note, this race is on a simple counter used for stats. It never was
exposed to user land except in the latency output, and that tracer 
disables interrupts anyway.

> 
> Please lets do what i suggested a long time ago: disable irqs _once_ 
> in any trace point and run atomically from that point on, and enable 
> them once, at the end.
> 
> The cost is very small and it turns into a win immediately by 
> elimination of a _single_ atomic instruction. (even on Nehalem they 
> cost 20 cycles. More on older CPUs.) We can drop the preempt-count 
> disable/enable as well and a lot of racy code as well. Please.

If we punt and simply disable interrupts in the ring buffer, I would then 
have to disable all tracing of NMIs. Yes it will make the code simpler, 
but the new code would also have:

ring_buffer_lock_reserve() {

	if (in_nmi())
		return NULL;

If that is acceptible, then fine. I'll make the change.

I will also throw away the lockless ring buffer since it would no long er 
be needed.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 13:42     ` Frederic Weisbecker
@ 2009-05-01 14:28       ` Steven Rostedt
  2009-05-01 22:10         ` Frederic Weisbecker
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 14:28 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Ingo Molnar, linux-kernel, Andrew Morton




On Fri, 1 May 2009, Frederic Weisbecker wrote:

> On Fri, May 01, 2009 at 01:50:47PM +0200, Ingo Molnar wrote:
> > 
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > From: Steven Rostedt <srostedt@redhat.com>
> > > 
> > > The entries counter in cpu buffer is not atomic. Although it only 
> > > gets updated by a single CPU, interrupts may come in and update 
> > > the counter too. This would cause missing entries to be added.
> > 
> > > -	unsigned long			entries;
> > > +	atomic_t			entries;
> > 
> > Hm, that's not really good as atomics can be rather expensive and 
> > this is the fastpath.
> > 
> > This is the upteenth time or so that the fact that we do not disable 
> > irqs while generating trace entries bites us in one way or another. 
> > IRQs can come in and confuse function trace output, etc. etc.
> > 
> > Please lets do what i suggested a long time ago: disable irqs _once_ 
> > in any trace point and run atomically from that point on, and enable 
> > them once, at the end.
> > 
> > The cost is very small and it turns into a win immediately by 
> > elimination of a _single_ atomic instruction. (even on Nehalem they 
> > cost 20 cycles. More on older CPUs.) We can drop the preempt-count 
> > disable/enable as well and a lot of racy code as well. Please.
> > 
> > 	Ingo
> 
> 
> I also suspect one other good effect on doing this.
> 
> As you know, between a lock_reserve and a discard, several interrupts
> can trigger some traces. It means that if some rooms have already been
> reserved, the discard will really create a discarded entry and we
> can't reuse it.
> 
> For example in the case of filters with lock tracing, we rapidly run
> into entries overriden, making the lock events tracing about useless
> because we rapidly lose everything.

If you want, we can disable interrupts from the event tracer, not the ring 
buffer.

We would have to go back to the original ring buffer code that passed in 
flags.

> 
> At least that's an effect I observed. I'm not sure the discard is the
> real cause but it seems to make sense.
> 
> That's a pity because believe me it is very useful to hunt a softlockup.
> 
> Of course it doesn't prevent from NMI tempest, but we already have
> protections for that.

If we do not allow interrupts to be traced, we can not allow NMIs either. 
If we do not let the ring buffer be re-entrant, then we will not be able 
to trace any NMI (to be safe).

Going this route, there would be no need to make a lockless ring buffer 
either.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 14:23     ` Steven Rostedt
@ 2009-05-01 16:14       ` Steven Rostedt
  2009-05-01 16:20         ` Ingo Molnar
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 16:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Steven Rostedt wrote:

> 
> On Fri, 1 May 2009, Ingo Molnar wrote:
> 
> > 
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > From: Steven Rostedt <srostedt@redhat.com>
> > > 
> > > The entries counter in cpu buffer is not atomic. Although it only 
> > > gets updated by a single CPU, interrupts may come in and update 
> > > the counter too. This would cause missing entries to be added.
> > 
> > > -	unsigned long			entries;
> > > +	atomic_t			entries;
> > 
> > Hm, that's not really good as atomics can be rather expensive and 
> > this is the fastpath.
> 
> Actually, it could be local_t. I used that in a lot of the other places.
> The race is with on CPU not other CPUs, and on archs like x86 there
> is not cost of the "LOCK".

Ug, it must be atomic_t. It is also modified by the reader. Thus it is not 
only a race with a single CPU but also multiple CPUs.

This means that interrupts disabled is not the only proctection it needs. 
It must either be an atomic, or protected by a spinlock.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 16:14       ` Steven Rostedt
@ 2009-05-01 16:20         ` Ingo Molnar
  2009-05-01 16:31           ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Ingo Molnar @ 2009-05-01 16:20 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Fri, 1 May 2009, Steven Rostedt wrote:
> 
> > 
> > On Fri, 1 May 2009, Ingo Molnar wrote:
> > 
> > > 
> > > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > > 
> > > > From: Steven Rostedt <srostedt@redhat.com>
> > > > 
> > > > The entries counter in cpu buffer is not atomic. Although it only 
> > > > gets updated by a single CPU, interrupts may come in and update 
> > > > the counter too. This would cause missing entries to be added.
> > > 
> > > > -	unsigned long			entries;
> > > > +	atomic_t			entries;
> > > 
> > > Hm, that's not really good as atomics can be rather expensive and 
> > > this is the fastpath.
> > 
> > Actually, it could be local_t. I used that in a lot of the other places.
> > The race is with on CPU not other CPUs, and on archs like x86 there
> > is not cost of the "LOCK".
> 
> Ug, it must be atomic_t. It is also modified by the reader. Thus 
> it is not only a race with a single CPU but also multiple CPUs.
> 
> This means that interrupts disabled is not the only proctection it 
> needs. It must either be an atomic, or protected by a spinlock.

Trace buffers are rather fundamentally per cpu. Where's the problem?

	Ingo

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 16:20         ` Ingo Molnar
@ 2009-05-01 16:31           ` Steven Rostedt
  2009-05-01 16:32             ` Steven Rostedt
  2009-05-01 16:52             ` Ingo Molnar
  0 siblings, 2 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 16:31 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Ingo Molnar wrote:
> > > > > the counter too. This would cause missing entries to be added.
> > > > 
> > > > > -	unsigned long			entries;
> > > > > +	atomic_t			entries;
> > > > 
> > > > Hm, that's not really good as atomics can be rather expensive and 
> > > > this is the fastpath.
> > > 
> > > Actually, it could be local_t. I used that in a lot of the other places.
> > > The race is with on CPU not other CPUs, and on archs like x86 there
> > > is not cost of the "LOCK".
> > 
> > Ug, it must be atomic_t. It is also modified by the reader. Thus 
> > it is not only a race with a single CPU but also multiple CPUs.
> > 
> > This means that interrupts disabled is not the only proctection it 
> > needs. It must either be an atomic, or protected by a spinlock.
> 
> Trace buffers are rather fundamentally per cpu. Where's the problem?

The entries keeps track of the number of entries in the buffer. A writer 
(producer) adds to the counter and readers (consumers) subtract from them. 
A writer can subtract them if it overwrites a page before the producer 
consumes it.

Only the writers are pinned to a CPU, the readers happen on any CPU.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 16:31           ` Steven Rostedt
@ 2009-05-01 16:32             ` Steven Rostedt
  2009-05-01 16:52             ` Ingo Molnar
  1 sibling, 0 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 16:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Steven Rostedt wrote:
> A writer can subtract them if it overwrites a page before the producer 
> consumes it.

s/producer/consumer/

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 16:31           ` Steven Rostedt
  2009-05-01 16:32             ` Steven Rostedt
@ 2009-05-01 16:52             ` Ingo Molnar
  2009-05-01 17:11               ` Steven Rostedt
  1 sibling, 1 reply; 31+ messages in thread
From: Ingo Molnar @ 2009-05-01 16:52 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Fri, 1 May 2009, Ingo Molnar wrote:
> > > > > > the counter too. This would cause missing entries to be added.
> > > > > 
> > > > > > -	unsigned long			entries;
> > > > > > +	atomic_t			entries;
> > > > > 
> > > > > Hm, that's not really good as atomics can be rather expensive and 
> > > > > this is the fastpath.
> > > > 
> > > > Actually, it could be local_t. I used that in a lot of the other places.
> > > > The race is with on CPU not other CPUs, and on archs like x86 there
> > > > is not cost of the "LOCK".
> > > 
> > > Ug, it must be atomic_t. It is also modified by the reader. Thus 
> > > it is not only a race with a single CPU but also multiple CPUs.
> > > 
> > > This means that interrupts disabled is not the only proctection it 
> > > needs. It must either be an atomic, or protected by a spinlock.
> > 
> > Trace buffers are rather fundamentally per cpu. Where's the 
> > problem?
> 
> The entries keeps track of the number of entries in the buffer. A 
> writer (producer) adds to the counter and readers (consumers) 
> subtract from them. A writer can subtract them if it overwrites a 
> page before the producer consumes it.
> 
> Only the writers are pinned to a CPU, the readers happen on any 
> CPU.

But that does not require atomicity. It requires careful use of 
barriers, but otherwise atomicity is not needed. Update of machine 
word variables (if they are aligned to a machine word) is guaranteed 
to be atomic, even without atomic_t overhead.

	Ingo

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 16:52             ` Ingo Molnar
@ 2009-05-01 17:11               ` Steven Rostedt
  2009-05-01 17:14                 ` Ingo Molnar
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 17:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Ingo Molnar wrote:
> > The entries keeps track of the number of entries in the buffer. A 
> > writer (producer) adds to the counter and readers (consumers) 
> > subtract from them. A writer can subtract them if it overwrites a 
> > page before the producer consumes it.
> > 
> > Only the writers are pinned to a CPU, the readers happen on any 
> > CPU.
> 
> But that does not require atomicity. It requires careful use of 
> barriers, but otherwise atomicity is not needed. Update of machine 
> word variables (if they are aligned to a machine word) is guaranteed 
> to be atomic, even without atomic_t overhead.

I'm confused :-/ This throws out all that I learned in multi threaded 
programming.

If I have a shared variable used by two threads, the adding and 
subtracting of that variable does not need to be atomic?

        CPU0                 CPU1
        ----                 ----
	load A               load A
	sub  1, A            sub 1, A
	store A              store A

can work??

BTW, how expensive is an LOCK operation on a cache line that is not 
shared by other CPUs?  As long as a reader is not running, the updating of 
the variable is not shared.

Also, if this becomes a problem, I can just nuke the field. I like knowing 
the number of entries in the buffer, as well as the number of overruns. 
But these are more for debugging purposes. The overruns should probably be 
kept, because that is the only way a tracer can know if a overrun 
occurred. The number of entries in the buffer is not as important.

-- Steve

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 17:11               ` Steven Rostedt
@ 2009-05-01 17:14                 ` Ingo Molnar
  2009-05-01 17:36                   ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Ingo Molnar @ 2009-05-01 17:14 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


* Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 1 May 2009, Ingo Molnar wrote:
> > > The entries keeps track of the number of entries in the buffer. A 
> > > writer (producer) adds to the counter and readers (consumers) 
> > > subtract from them. A writer can subtract them if it overwrites a 
> > > page before the producer consumes it.
> > > 
> > > Only the writers are pinned to a CPU, the readers happen on any 
> > > CPU.
> > 
> > But that does not require atomicity. It requires careful use of 
> > barriers, but otherwise atomicity is not needed. Update of machine 
> > word variables (if they are aligned to a machine word) is guaranteed 
> > to be atomic, even without atomic_t overhead.
> 
> I'm confused :-/ This throws out all that I learned in multi threaded 
> programming.
> 
> If I have a shared variable used by two threads, the adding and 
> subtracting of that variable does not need to be atomic?
> 
>         CPU0                 CPU1
>         ----                 ----
> 	load A               load A
> 	sub  1, A            sub 1, A
> 	store A              store A
> 
> can work??

no, that wont work. But as long as there's just a single CPU that is 
a _writer_ (does stores), it can be observed in an atomic/coherent 
manner, without the use of atomics.

	Ingo

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 17:14                 ` Ingo Molnar
@ 2009-05-01 17:36                   ` Steven Rostedt
  2009-05-01 17:42                     ` Ingo Molnar
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 17:36 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Ingo Molnar wrote:

> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Fri, 1 May 2009, Ingo Molnar wrote:
> > > > The entries keeps track of the number of entries in the buffer. A 
> > > > writer (producer) adds to the counter and readers (consumers) 
> > > > subtract from them. A writer can subtract them if it overwrites a 
> > > > page before the producer consumes it.
> > > > 
> > > > Only the writers are pinned to a CPU, the readers happen on any 
> > > > CPU.
> > > 
> > > But that does not require atomicity. It requires careful use of 
> > > barriers, but otherwise atomicity is not needed. Update of machine 
> > > word variables (if they are aligned to a machine word) is guaranteed 
> > > to be atomic, even without atomic_t overhead.
> > 
> > I'm confused :-/ This throws out all that I learned in multi threaded 
> > programming.
> > 
> > If I have a shared variable used by two threads, the adding and 
> > subtracting of that variable does not need to be atomic?
> > 
> >         CPU0                 CPU1
> >         ----                 ----
> > 	load A               load A
> > 	sub  1, A            sub 1, A
> > 	store A              store A
> > 
> > can work??
> 
> no, that wont work. But as long as there's just a single CPU that is 
> a _writer_ (does stores), it can be observed in an atomic/coherent 
> manner, without the use of atomics.

Ah, maybe there's confusion in my explanation. When I talk about writers 
and readers, I'm talking about those writers into the ring buffer and 
readers from the ring buffer. But both writers and readers write to the 
entries counter. Readers subtract and writers add. But writers can also 
subtract on overruns.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 17:36                   ` Steven Rostedt
@ 2009-05-01 17:42                     ` Ingo Molnar
  2009-05-01 17:56                       ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Ingo Molnar @ 2009-05-01 17:42 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Fri, 1 May 2009, Ingo Molnar wrote:
> 
> > 
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > On Fri, 1 May 2009, Ingo Molnar wrote:
> > > > > The entries keeps track of the number of entries in the buffer. A 
> > > > > writer (producer) adds to the counter and readers (consumers) 
> > > > > subtract from them. A writer can subtract them if it overwrites a 
> > > > > page before the producer consumes it.
> > > > > 
> > > > > Only the writers are pinned to a CPU, the readers happen on any 
> > > > > CPU.
> > > > 
> > > > But that does not require atomicity. It requires careful use of 
> > > > barriers, but otherwise atomicity is not needed. Update of machine 
> > > > word variables (if they are aligned to a machine word) is guaranteed 
> > > > to be atomic, even without atomic_t overhead.
> > > 
> > > I'm confused :-/ This throws out all that I learned in multi threaded 
> > > programming.
> > > 
> > > If I have a shared variable used by two threads, the adding and 
> > > subtracting of that variable does not need to be atomic?
> > > 
> > >         CPU0                 CPU1
> > >         ----                 ----
> > > 	load A               load A
> > > 	sub  1, A            sub 1, A
> > > 	store A              store A
> > > 
> > > can work??
> > 
> > no, that wont work. But as long as there's just a single CPU that is 
> > a _writer_ (does stores), it can be observed in an atomic/coherent 
> > manner, without the use of atomics.
> 
> Ah, maybe there's confusion in my explanation. When I talk about 
> writers and readers, I'm talking about those writers into the ring 
> buffer and readers from the ring buffer. But both writers and 
> readers write to the entries counter. Readers subtract and writers 
> add. But writers can also subtract on overruns.

a solution for that would be to split it into two counts - for both 
sides. Or to eliminate it if possible. We _really_ need to make the 
ring-buffer _much_ cheaper than it is today.
y
	Ingo

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 17:42                     ` Ingo Molnar
@ 2009-05-01 17:56                       ` Steven Rostedt
  2009-05-01 21:17                         ` Steven Rostedt
  0 siblings, 1 reply; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 17:56 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Ingo Molnar wrote:
> > 
> > Ah, maybe there's confusion in my explanation. When I talk about 
> > writers and readers, I'm talking about those writers into the ring 
> > buffer and readers from the ring buffer. But both writers and 
> > readers write to the entries counter. Readers subtract and writers 
> > add. But writers can also subtract on overruns.
> 
> a solution for that would be to split it into two counts - for both 
> sides. Or to eliminate it if possible. We _really_ need to make the 
> ring-buffer _much_ cheaper than it is today.

I was thinking the same thing :-)

Actually we would have three counters. All incremental.

	entries
	overruns
	read

The writer when adding an entry would increment entries

The writer when overwriting will increment overruns

The reader would increment read.

We could even make it 64 bits (or more) on all archs. Heck, make it two 
longs (128 bits for 64 bit archs). Since we only need to worry about 
interrupts, we only increment the second counter if we wrapped the first. 
I doubt a single interrupt (or many) could wrap the first counter. If it 
could, we have more to worry about than counters.

Thus the number of entries in the buffer would be:

	(entries - overruns) - read

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 17:56                       ` Steven Rostedt
@ 2009-05-01 21:17                         ` Steven Rostedt
  0 siblings, 0 replies; 31+ messages in thread
From: Steven Rostedt @ 2009-05-01 21:17 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Andrew Morton, Frederic Weisbecker


On Fri, 1 May 2009, Steven Rostedt wrote:

> 
> On Fri, 1 May 2009, Ingo Molnar wrote:
> > > 
> > > Ah, maybe there's confusion in my explanation. When I talk about 
> > > writers and readers, I'm talking about those writers into the ring 
> > > buffer and readers from the ring buffer. But both writers and 
> > > readers write to the entries counter. Readers subtract and writers 
> > > add. But writers can also subtract on overruns.
> > 
> > a solution for that would be to split it into two counts - for both 
> > sides. Or to eliminate it if possible. We _really_ need to make the 
> > ring-buffer _much_ cheaper than it is today.
> 
> I was thinking the same thing :-)
> 
> Actually we would have three counters. All incremental.
> 
> 	entries
> 	overruns
> 	read
> 
> The writer when adding an entry would increment entries
> 
> The writer when overwriting will increment overruns
> 
> The reader would increment read.
> 
> We could even make it 64 bits (or more) on all archs. Heck, make it two 
> longs (128 bits for 64 bit archs). Since we only need to worry about 

Actually this would work with out needing the double numbers.


> interrupts, we only increment the second counter if we wrapped the first. 
> I doubt a single interrupt (or many) could wrap the first counter. If it 
> could, we have more to worry about than counters.
> 
> Thus the number of entries in the buffer would be:
> 
> 	(entries - overruns) - read

The above should be the same as:

   ((entries & SOME_MASK) - (overruns & SOME_MASK)) - (read & SOME_MASK)

As long as SOME_MASK covers more bits then the final result. Since we are 
interested in the number of entries in the buffer, as long as we don't 
have more entries than word size we should be fine. And if we did have 
more, then the previous solution was wrong too.

-- Steve


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic
  2009-05-01 14:28       ` Steven Rostedt
@ 2009-05-01 22:10         ` Frederic Weisbecker
  0 siblings, 0 replies; 31+ messages in thread
From: Frederic Weisbecker @ 2009-05-01 22:10 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, linux-kernel, Andrew Morton

On Fri, May 01, 2009 at 10:28:13AM -0400, Steven Rostedt wrote:
> 
> 
> 
> On Fri, 1 May 2009, Frederic Weisbecker wrote:
> 
> > On Fri, May 01, 2009 at 01:50:47PM +0200, Ingo Molnar wrote:
> > > 
> > > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > > 
> > > > From: Steven Rostedt <srostedt@redhat.com>
> > > > 
> > > > The entries counter in cpu buffer is not atomic. Although it only 
> > > > gets updated by a single CPU, interrupts may come in and update 
> > > > the counter too. This would cause missing entries to be added.
> > > 
> > > > -	unsigned long			entries;
> > > > +	atomic_t			entries;
> > > 
> > > Hm, that's not really good as atomics can be rather expensive and 
> > > this is the fastpath.
> > > 
> > > This is the upteenth time or so that the fact that we do not disable 
> > > irqs while generating trace entries bites us in one way or another. 
> > > IRQs can come in and confuse function trace output, etc. etc.
> > > 
> > > Please lets do what i suggested a long time ago: disable irqs _once_ 
> > > in any trace point and run atomically from that point on, and enable 
> > > them once, at the end.
> > > 
> > > The cost is very small and it turns into a win immediately by 
> > > elimination of a _single_ atomic instruction. (even on Nehalem they 
> > > cost 20 cycles. More on older CPUs.) We can drop the preempt-count 
> > > disable/enable as well and a lot of racy code as well. Please.
> > > 
> > > 	Ingo
> > 
> > 
> > I also suspect one other good effect on doing this.
> > 
> > As you know, between a lock_reserve and a discard, several interrupts
> > can trigger some traces. It means that if some rooms have already been
> > reserved, the discard will really create a discarded entry and we
> > can't reuse it.
> > 
> > For example in the case of filters with lock tracing, we rapidly run
> > into entries overriden, making the lock events tracing about useless
> > because we rapidly lose everything.
> 
> If you want, we can disable interrupts from the event tracer, not the ring 
> buffer.
> 
> We would have to go back to the original ring buffer code that passed in 
> flags.



Or may be just copy the entry on the stack and filter on it,
then only reserve if it is eligible.


 
> > 
> > At least that's an effect I observed. I'm not sure the discard is the
> > real cause but it seems to make sense.
> > 
> > That's a pity because believe me it is very useful to hunt a softlockup.
> > 
> > Of course it doesn't prevent from NMI tempest, but we already have
> > protections for that.
> 
> If we do not allow interrupts to be traced, we can not allow NMIs either. 
> If we do not let the ring buffer be re-entrant, then we will not be able 
> to trace any NMI (to be safe).
> 
> Going this route, there would be no need to make a lockless ring buffer 
> either.


Hm that's really not what we want, indeed :-)
Just forget about what I said.


> 
> -- Steve
> 


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2009-05-01 22:15 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-01  2:22 [PATCH 0/3] [GIT PULL] tracing/ringbuffer: updates for tip Steven Rostedt
2009-05-01  2:22 ` [PATCH 1/3] ring-buffer: add counters for commit overrun and nmi dropped entries Steven Rostedt
2009-05-01  3:00   ` Andrew Morton
2009-05-01  3:11     ` Steven Rostedt
2009-05-01  3:22       ` Andrew Morton
2009-05-01  3:33         ` Steven Rostedt
2009-05-01  3:38           ` Andrew Morton
2009-05-01  3:55             ` Steven Rostedt
2009-05-01  2:22 ` [PATCH 2/3] tracing: export stats of ring buffers to userspace Steven Rostedt
2009-05-01  3:08   ` Frederic Weisbecker
2009-05-01  3:23     ` Steven Rostedt
2009-05-01 12:43       ` Frederic Weisbecker
2009-05-01  2:22 ` [PATCH 3/3] ring-buffer: make cpu buffer entries counter atomic Steven Rostedt
2009-05-01  3:06   ` Andrew Morton
2009-05-01  3:28     ` Steven Rostedt
2009-05-01 11:50   ` Ingo Molnar
2009-05-01 13:42     ` Frederic Weisbecker
2009-05-01 14:28       ` Steven Rostedt
2009-05-01 22:10         ` Frederic Weisbecker
2009-05-01 14:23     ` Steven Rostedt
2009-05-01 16:14       ` Steven Rostedt
2009-05-01 16:20         ` Ingo Molnar
2009-05-01 16:31           ` Steven Rostedt
2009-05-01 16:32             ` Steven Rostedt
2009-05-01 16:52             ` Ingo Molnar
2009-05-01 17:11               ` Steven Rostedt
2009-05-01 17:14                 ` Ingo Molnar
2009-05-01 17:36                   ` Steven Rostedt
2009-05-01 17:42                     ` Ingo Molnar
2009-05-01 17:56                       ` Steven Rostedt
2009-05-01 21:17                         ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox