All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [for-next][PATCH 07/21] ring-buffer: Have rb_iter_head_event() handle concurrent writer
Date: Sun, 29 Mar 2020 14:42:59 -0400	[thread overview]
Message-ID: <20200329184316.505376001@goodmis.org> (raw)
In-Reply-To: 20200329184252.289087453@goodmis.org

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Have the ring_buffer_iter structure have a place to store an event, such
that it can not be overwritten by a writer, and load it in such a way via
rb_iter_head_event() that it will return NULL and reset the iter to the
start of the current page if a writer updated the page.

Link: http://lkml.kernel.org/r/20200317213416.306959216@goodmis.org

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 106 ++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 31 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e689bdcb53e8..3d718add73c1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -503,11 +503,13 @@ struct trace_buffer {
 struct ring_buffer_iter {
 	struct ring_buffer_per_cpu	*cpu_buffer;
 	unsigned long			head;
+	unsigned long			next_event;
 	struct buffer_page		*head_page;
 	struct buffer_page		*cache_reader_page;
 	unsigned long			cache_read;
 	u64				read_stamp;
 	u64				page_stamp;
+	struct ring_buffer_event	*event;
 };
 
 /**
@@ -1914,15 +1916,59 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 			       cpu_buffer->reader_page->read);
 }
 
-static __always_inline struct ring_buffer_event *
-rb_iter_head_event(struct ring_buffer_iter *iter)
+static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
 {
-	return __rb_page_index(iter->head_page, iter->head);
+	return local_read(&bpage->page->commit);
 }
 
-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
+static struct ring_buffer_event *
+rb_iter_head_event(struct ring_buffer_iter *iter)
 {
-	return local_read(&bpage->page->commit);
+	struct ring_buffer_event *event;
+	struct buffer_page *iter_head_page = iter->head_page;
+	unsigned long commit;
+	unsigned length;
+
+	/*
+	 * When the writer goes across pages, it issues a cmpxchg which
+	 * is a mb(), which will synchronize with the rmb here.
+	 * (see rb_tail_page_update() and __rb_reserve_next())
+	 */
+	commit = rb_page_commit(iter_head_page);
+	smp_rmb();
+	event = __rb_page_index(iter_head_page, iter->head);
+	length = rb_event_length(event);
+
+	/*
+	 * READ_ONCE() doesn't work on functions and we don't want the
+	 * compiler doing any crazy optimizations with length.
+	 */
+	barrier();
+
+	if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
+		/* Writer corrupted the read? */
+		goto reset;
+
+	memcpy(iter->event, event, length);
+	/*
+	 * If the page stamp is still the same after this rmb() then the
+	 * event was safely copied without the writer entering the page.
+	 */
+	smp_rmb();
+
+	/* Make sure the page didn't change since we read this */
+	if (iter->page_stamp != iter_head_page->page->time_stamp ||
+	    commit > rb_page_commit(iter_head_page))
+		goto reset;
+
+	iter->next_event = iter->head + length;
+	return iter->event;
+ reset:
+	/* Reset to the beginning */
+	iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
+	iter->head = 0;
+	iter->next_event = 0;
+	return NULL;
 }
 
 /* Size is determined by what has been committed */
@@ -1962,6 +2008,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 
 	iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
 	iter->head = 0;
+	iter->next_event = 0;
 }
 
 /*
@@ -3548,6 +3595,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 	/* Iterator usage is expected to have record disabled */
 	iter->head_page = cpu_buffer->reader_page;
 	iter->head = cpu_buffer->reader_page->read;
+	iter->next_event = iter->head;
 
 	iter->cache_reader_page = iter->head_page;
 	iter->cache_read = cpu_buffer->read;
@@ -3625,7 +3673,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 		return 0;
 
 	/* Still racy, as it may return a false positive, but that's OK */
-	return ((iter->head_page == commit_page && iter->head == commit) ||
+	return ((iter->head_page == commit_page && iter->head >= commit) ||
 		(iter->head_page == reader && commit_page == head_page &&
 		 head_page->read == commit &&
 		 iter->head == rb_page_commit(cpu_buffer->reader_page)));
@@ -3853,15 +3901,22 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 static void rb_advance_iter(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
-	struct ring_buffer_event *event;
-	unsigned length;
 
 	cpu_buffer = iter->cpu_buffer;
 
+	/* If head == next_event then we need to jump to the next event */
+	if (iter->head == iter->next_event) {
+		/* If the event gets overwritten again, there's nothing to do */
+		if (rb_iter_head_event(iter) == NULL)
+			return;
+	}
+
+	iter->head = iter->next_event;
+
 	/*
 	 * Check if we are at the end of the buffer.
 	 */
-	if (iter->head >= rb_page_size(iter->head_page)) {
+	if (iter->next_event >= rb_page_size(iter->head_page)) {
 		/* discarded commits can make the page empty */
 		if (iter->head_page == cpu_buffer->commit_page)
 			return;
@@ -3869,27 +3924,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 		return;
 	}
 
-	event = rb_iter_head_event(iter);
-
-	length = rb_event_length(event);
-
-	/*
-	 * This should not be called to advance the header if we are
-	 * at the tail of the buffer.
-	 */
-	if (RB_WARN_ON(cpu_buffer,
-		       (iter->head_page == cpu_buffer->commit_page) &&
-		       (iter->head + length > rb_commit_index(cpu_buffer))))
-		return;
-
-	rb_update_iter_read_stamp(iter, event);
-
-	iter->head += length;
-
-	/* check for end of page padding */
-	if ((iter->head >= rb_page_size(iter->head_page)) &&
-	    (iter->head_page != cpu_buffer->commit_page))
-		rb_inc_iter(iter);
+	rb_update_iter_read_stamp(iter, iter->event);
 }
 
 static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
@@ -4017,6 +4052,8 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	}
 
 	event = rb_iter_head_event(iter);
+	if (!event)
+		goto again;
 
 	switch (event->type_len) {
 	case RINGBUF_TYPE_PADDING:
@@ -4233,10 +4270,16 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
-	iter = kmalloc(sizeof(*iter), flags);
+	iter = kzalloc(sizeof(*iter), flags);
 	if (!iter)
 		return NULL;
 
+	iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
+	if (!iter->event) {
+		kfree(iter);
+		return NULL;
+	}
+
 	cpu_buffer = buffer->buffers[cpu];
 
 	iter->cpu_buffer = cpu_buffer;
@@ -4317,6 +4360,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
 
 	atomic_dec(&cpu_buffer->record_disabled);
 	atomic_dec(&cpu_buffer->buffer->resize_disabled);
+	kfree(iter->event);
 	kfree(iter);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_finish);
-- 
2.25.1



  parent reply	other threads:[~2020-03-29 18:44 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-29 18:42 [for-next][PATCH 00/21] tracing: Updates for v5.7 Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 01/21] tracing: Use address-of operator on section symbols Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 02/21] selftest/ftrace: Fix function trigger test to handle trace not disabling the tracer Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 03/21] tracing: Save off entry when peeking at next entry Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 04/21] ring-buffer: Have ring_buffer_empty() not depend on tracing stopped Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 05/21] ring-buffer: Rename ring_buffer_read() to read_buffer_iter_advance() Steven Rostedt
2020-03-29 18:42 ` [for-next][PATCH 06/21] ring-buffer: Add page_stamp to iterator for synchronization Steven Rostedt
2020-03-29 18:42 ` Steven Rostedt [this message]
2020-03-29 18:43 ` [for-next][PATCH 08/21] ring-buffer: Do not die if rb_iter_peek() fails more than thrice Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 09/21] ring-buffer: Optimize rb_iter_head_event() Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 10/21] ring-buffer: Make resize disable per cpu buffer instead of total buffer Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 11/21] ring-buffer: Do not disable recording when there is an iterator Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 12/21] tracing: Do not disable tracing when reading the trace file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 13/21] ring-buffer/tracing: Have iterator acknowledge dropped events Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 14/21] tracing: Have the document reflect that the trace file keeps tracing enabled Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 15/21] ftrace/kprobe: Show the maxactive number on kprobe_events Steven Rostedt
2020-03-31 13:11   ` Sasha Levin
2020-03-29 18:43 ` [for-next][PATCH 16/21] ftrace: Make function trace pid filtering a bit more exact Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 17/21] ftrace: Create set_ftrace_notrace_pid to not trace tasks Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 18/21] tracing: Create set_event_notrace_pid " Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 19/21] selftests/ftrace: Add test to test new set_ftrace_notrace_pid file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 20/21] selftests/ftrace: Add test to test new set_event_notrace_pid file Steven Rostedt
2020-03-29 18:43 ` [for-next][PATCH 21/21] tracing: Add documentation on set_ftrace_notrace_pid and set_event_notrace_pid Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200329184316.505376001@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.