From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Andrew Morton <akpm@linux-foundation.org>,
Vincent Donnefort <vdonnefort@google.com>,
Joel Fernandes <joel@joelfernandes.org>,
Daniel Bristot de Oliveira <bristot@redhat.com>,
Ingo Molnar <mingo@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
suleiman@google.com, Thomas Gleixner <tglx@linutronix.de>,
Vineeth Pillai <vineeth@bitbyteword.org>,
Youssef Esmat <youssefesmat@google.com>,
Beau Belgrave <beaub@linux.microsoft.com>,
Alexander Graf <graf@amazon.com>, Baoquan He <bhe@redhat.com>,
Borislav Petkov <bp@alien8.de>,
"Paul E. McKenney" <paulmck@kernel.org>,
David Howells <dhowells@redhat.com>,
Mike Rapoport <rppt@kernel.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Tony Luck <tony.luck@intel.com>,
Guenter Roeck <linux@roeck-us.net>,
Ross Zwisler <zwisler@google.com>,
Kees Cook <keescook@chromium.org>
Subject: [PATCH v6 03/13] ring-buffer: Add ring_buffer_meta data
Date: Wed, 12 Jun 2024 19:19:37 -0400 [thread overview]
Message-ID: <20240612232025.530733577@goodmis.org> (raw)
In-Reply-To: 20240612231934.608252486@goodmis.org
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Populate the ring_buffer_meta array. It holds the pointer to the
head_buffer (next to read), the commit_buffer (next to write) the size of
the sub-buffers, number of sub-buffers and an array that keeps track of
the order of the sub-buffers.
This information will be stored in the persistent memory to help on reboot
to reconstruct the ring buffer.
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 209 ++++++++++++++++++++++++++++++++-----
1 file changed, 184 insertions(+), 25 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6edd70e45807..0e3ed7f1cc5d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -43,6 +43,11 @@
static void update_pages_handler(struct work_struct *work);
struct ring_buffer_meta {
+ unsigned long head_buffer;
+ unsigned long commit_buffer;
+ __u32 subbuf_size;
+ __u32 nr_subbufs;
+ int buffers[];
};
/*
@@ -501,6 +506,7 @@ struct ring_buffer_per_cpu {
struct mutex mapping_lock;
unsigned long *subbuf_ids; /* ID to subbuf VA */
struct trace_buffer_meta *meta_page;
+ struct ring_buffer_meta *ring_meta;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
@@ -1261,6 +1267,11 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
* Set the previous list pointer to have the HEAD flag.
*/
rb_set_list_to_head(head->list.prev);
+
+ if (cpu_buffer->ring_meta) {
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ meta->head_buffer = (unsigned long)head->page;
+ }
}
static void rb_list_head_clear(struct list_head *list)
@@ -1515,51 +1526,127 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
}
/*
- * Return a specific sub-buffer for a given @cpu defined by @idx.
+ * Return the ring_buffer_meta for a given @cpu.
*/
-static void *rb_range_buffer(struct trace_buffer *buffer, int cpu, int nr_pages, int idx)
+static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
{
- unsigned long ptr;
int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
+ unsigned long ptr = buffer->range_addr_start;
+ struct ring_buffer_meta *meta;
int nr_subbufs;
- /* Include the reader page */
- nr_subbufs = nr_pages + 1;
+ if (!ptr)
+ return NULL;
+
+ /* When nr_pages passed in is zero, the first meta has already been initialized */
+ if (!nr_pages) {
+ meta = (struct ring_buffer_meta *)ptr;
+ nr_subbufs = meta->nr_subbufs;
+ } else {
+ meta = NULL;
+ /* Include the reader page */
+ nr_subbufs = nr_pages + 1;
+ }
/*
* The first chunk may not be subbuffer aligned, where as
* the rest of the chunks are.
*/
- ptr = buffer->range_addr_start;
- ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
if (cpu) {
- unsigned long p;
-
- ptr += subbuf_size * nr_subbufs;
-
- /* Save the beginning of this CPU chunk */
- p = ptr;
-
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
+ ptr += subbuf_size * nr_subbufs;
/* We can use multiplication to find chunks greater than 1 */
if (cpu > 1) {
unsigned long size;
+ unsigned long p;
+ /* Save the beginning of this CPU chunk */
+ p = ptr;
+ ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
ptr += subbuf_size * nr_subbufs;
/* Now all chunks after this are the same size */
size = ptr - p;
ptr += size * (cpu - 2);
-
- ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
}
}
- if (ptr + subbuf_size * nr_subbufs > buffer->range_addr_end)
+ return (void *)ptr;
+}
+
+/* Return the start of subbufs given the meta pointer */
+static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
+{
+ int subbuf_size = meta->subbuf_size;
+ unsigned long ptr;
+
+ ptr = (unsigned long)meta;
+ ptr = rb_range_align_subbuf(ptr, subbuf_size, meta->nr_subbufs);
+
+ return (void *)ptr;
+}
+
+/*
+ * Return a specific sub-buffer for a given @cpu defined by @idx.
+ */
+static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
+{
+ struct ring_buffer_meta *meta;
+ unsigned long ptr;
+ int subbuf_size;
+
+ meta = rb_range_meta(cpu_buffer->buffer, 0, cpu_buffer->cpu);
+ if (!meta)
+ return NULL;
+
+ if (WARN_ON_ONCE(idx >= meta->nr_subbufs))
return NULL;
+
+ subbuf_size = meta->subbuf_size;
+
+ /* Map this buffer to the order that's in meta->buffers[] */
+ idx = meta->buffers[idx];
+
+ ptr = (unsigned long)rb_subbufs_from_meta(meta);
+
+ ptr += subbuf_size * idx;
+ if (ptr + subbuf_size > cpu_buffer->buffer->range_addr_end)
+ return NULL;
+
return (void *)ptr;
}
+static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
+{
+ struct ring_buffer_meta *meta;
+ void *subbuf;
+ int cpu;
+ int i;
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+ meta = rb_range_meta(buffer, nr_pages, cpu);
+
+ meta->nr_subbufs = nr_pages + 1;
+ meta->subbuf_size = PAGE_SIZE;
+
+ subbuf = rb_subbufs_from_meta(meta);
+
+ /*
+ * The buffers[] array holds the order of the sub-buffers
+ * that are after the meta data. The sub-buffers may
+ * be swapped out when read and inserted into a different
+ * location of the ring buffer. Although their addresses
+ * remain the same, the buffers[] array contains the
+ * index into the sub-buffers holding their actual order.
+ */
+ for (i = 0; i < meta->nr_subbufs; i++) {
+ meta->buffers[i] = i;
+ rb_init_page(subbuf);
+ subbuf += meta->subbuf_size;
+ }
+ }
+}
+
static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
long nr_pages, struct list_head *pages)
{
@@ -1600,7 +1687,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
set_current_oom_origin();
for (i = 0; i < nr_pages; i++) {
struct page *page;
- int cpu = cpu_buffer->cpu;
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
mflags, cpu_to_node(cpu_buffer->cpu));
@@ -1617,10 +1703,11 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
if (buffer->range_addr_start) {
/* A range was given. Use that for the buffer page */
- bpage->page = rb_range_buffer(buffer, cpu, nr_pages, i + 1);
+ bpage->page = rb_range_buffer(cpu_buffer, i + 1);
if (!bpage->page)
goto free_pages;
bpage->range = 1;
+ bpage->id = i + 1;
} else {
page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
mflags | __GFP_COMP | __GFP_ZERO,
@@ -1628,9 +1715,9 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
if (!page)
goto free_pages;
bpage->page = page_address(page);
+ rb_init_page(bpage->page);
}
bpage->order = cpu_buffer->buffer->subbuf_order;
- rb_init_page(bpage->page);
if (user_thread && fatal_signal_pending(current))
goto free_pages;
@@ -1711,7 +1798,13 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
cpu_buffer->reader_page = bpage;
if (buffer->range_addr_start) {
- bpage->page = rb_range_buffer(buffer, cpu, nr_pages, 0);
+ /*
+ * Range mapped buffers have the same restrictions as memory
+ * mapped ones do.
+ */
+ cpu_buffer->mapped = 1;
+ cpu_buffer->ring_meta = rb_range_meta(buffer, nr_pages, cpu);
+ bpage->page = rb_range_buffer(cpu_buffer, 0);
if (!bpage->page)
goto fail_free_reader;
bpage->range = 1;
@@ -1722,8 +1815,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
if (!page)
goto fail_free_reader;
bpage->page = page_address(page);
+ rb_init_page(bpage->page);
}
- rb_init_page(bpage->page);
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
INIT_LIST_HEAD(&cpu_buffer->new_pages);
@@ -1737,6 +1830,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
rb_head_page_activate(cpu_buffer);
+ if (cpu_buffer->ring_meta) {
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ meta->commit_buffer = meta->head_buffer;
+ }
return cpu_buffer;
@@ -1856,6 +1953,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
nr_pages--;
buffer->range_addr_start = start;
buffer->range_addr_end = end;
+
+ rb_range_meta_init(buffer, nr_pages);
} else {
/* need at least two pages */
@@ -2544,6 +2643,52 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
iter->next_event = 0;
}
+/* Return the index into the sub-buffers for a given sub-buffer */
+static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
+{
+ void *subbuf_array;
+
+ subbuf_array = (void *)meta + sizeof(int) * meta->nr_subbufs;
+ subbuf_array = (void *)ALIGN((unsigned long)subbuf_array, meta->subbuf_size);
+ return (subbuf - subbuf_array) / meta->subbuf_size;
+}
+
+static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *next_page)
+{
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ unsigned long old_head = (unsigned long)next_page->page;
+ unsigned long new_head;
+
+ rb_inc_page(&next_page);
+ new_head = (unsigned long)next_page->page;
+
+ /*
+ * Only move it forward once, if something else came in and
+ * moved it forward, then we don't want to touch it.
+ */
+ (void)cmpxchg(&meta->head_buffer, old_head, new_head);
+}
+
+static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *reader)
+{
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ void *old_reader = cpu_buffer->reader_page->page;
+ void *new_reader = reader->page;
+ int id;
+
+ id = reader->id;
+ cpu_buffer->reader_page->id = id;
+ reader->id = 0;
+
+ meta->buffers[0] = rb_meta_subbuf_idx(meta, new_reader);
+ meta->buffers[id] = rb_meta_subbuf_idx(meta, old_reader);
+
+ /* The head pointer is the one after the reader */
+ rb_update_meta_head(cpu_buffer, reader);
+}
+
/*
* rb_handle_head_page - writer hit the head page
*
@@ -2593,6 +2738,8 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
+ if (cpu_buffer->ring_meta)
+ rb_update_meta_head(cpu_buffer, next_page);
/*
* The entries will be zeroed out when we move the
* tail page.
@@ -3154,6 +3301,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
rb_inc_page(&cpu_buffer->commit_page);
+ if (cpu_buffer->ring_meta) {
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page;
+ }
/* add barrier to keep gcc from optimizing too much */
barrier();
}
@@ -4771,6 +4922,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
if (!ret)
goto spin;
+ if (cpu_buffer->ring_meta)
+ rb_update_meta_reader(cpu_buffer, reader);
+
/*
* Yay! We succeeded in replacing the page.
*
@@ -5451,11 +5605,16 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->lost_events = 0;
cpu_buffer->last_overrun = 0;
- if (cpu_buffer->user_mapped)
- rb_update_meta_page(cpu_buffer);
-
rb_head_page_activate(cpu_buffer);
cpu_buffer->pages_removed = 0;
+
+ if (cpu_buffer->mapped) {
+ rb_update_meta_page(cpu_buffer);
+ if (cpu_buffer->ring_meta) {
+ struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
+ meta->commit_buffer = meta->head_buffer;
+ }
+ }
}
/* Must have disabled the cpu buffer then done a synchronize_rcu */
--
2.43.0
next prev parent reply other threads:[~2024-06-12 23:20 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-12 23:19 [PATCH v6 00/13] tracing: Persistent traces across a reboot or crash Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 01/13] ring-buffer: Allow mapped field to be set without mapping Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 02/13] ring-buffer: Add ring_buffer_alloc_range() Steven Rostedt
2024-06-12 23:19 ` Steven Rostedt [this message]
2024-06-12 23:19 ` [PATCH v6 04/13] tracing: Implement creating an instance based on a given memory region Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 05/13] ring-buffer: Add output of ring buffer meta page Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 06/13] ring-buffer: Add test if range of boot buffer is valid Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 07/13] ring-buffer: Validate boot range memory events Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 08/13] tracing: Add option to use memmapped memory for trace boot instance Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 09/13] ring-buffer: Save text and data locations in mapped meta data Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 10/13] tracing/ring-buffer: Add last_boot_info file to boot instance Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 11/13] tracing: Handle old buffer mappings for event strings and functions Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 12/13] tracing: Update function tracing output for previous boot buffer Steven Rostedt
2024-06-12 23:19 ` [PATCH v6 13/13] tracing: Add last boot delta offset for stack traces Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240612232025.530733577@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=beaub@linux.microsoft.com \
--cc=bhe@redhat.com \
--cc=bp@alien8.de \
--cc=bristot@redhat.com \
--cc=dave.hansen@linux.intel.com \
--cc=dhowells@redhat.com \
--cc=graf@amazon.com \
--cc=joel@joelfernandes.org \
--cc=keescook@chromium.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=linux@roeck-us.net \
--cc=mark.rutland@arm.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=paulmck@kernel.org \
--cc=peterz@infradead.org \
--cc=rppt@kernel.org \
--cc=suleiman@google.com \
--cc=tglx@linutronix.de \
--cc=tony.luck@intel.com \
--cc=vdonnefort@google.com \
--cc=vineeth@bitbyteword.org \
--cc=youssefesmat@google.com \
--cc=zwisler@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.