* [PATCH v20 01/10] ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 02/10] ring-buffer: Skip invalid sub-buffers when rewinding " Steven Rostedt
` (9 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Skip invalid sub-buffers when validating the persistent ring buffer
instead of discarding the entire ring buffer. Only skipped buffers
are invalidated (cleared).
If the cache data in memory fails to be synchronized during a reboot,
the persistent ring buffer may become partially corrupted, but other
sub-buffers may still contain readable event data. Only discard the
subbuffers that are found to be corrupted.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 116 ++++++++++++++++++++++---------------
1 file changed, 70 insertions(+), 46 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4c0cf6ac0161..97ef702655f5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -370,6 +370,12 @@ static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
return local_read(&bpage->page->commit);
}
+/* Size is determined by what has been committed */
+static __always_inline unsigned int rb_page_size(struct buffer_page *bpage)
+{
+ return rb_page_commit(bpage) & ~RB_MISSED_MASK;
+}
+
static void free_buffer_page(struct buffer_page *bpage)
{
/* Range pages are not to be freed */
@@ -1762,7 +1768,6 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
unsigned long *subbuf_mask)
{
int subbuf_size = PAGE_SIZE;
- struct buffer_data_page *subbuf;
unsigned long buffers_start;
unsigned long buffers_end;
int i;
@@ -1770,6 +1775,11 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
if (!subbuf_mask)
return false;
+ if (meta->subbuf_size != PAGE_SIZE) {
+ pr_info("Ring buffer boot meta [%d] invalid subbuf_size\n", cpu);
+ return false;
+ }
+
buffers_start = meta->first_buffer;
buffers_end = meta->first_buffer + (subbuf_size * meta->nr_subbufs);
@@ -1786,11 +1796,12 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
return false;
}
- subbuf = rb_subbufs_from_meta(meta);
-
bitmap_clear(subbuf_mask, 0, meta->nr_subbufs);
- /* Is the meta buffers and the subbufs themselves have correct data? */
+ /*
+ * Ensure the meta::buffers array has correct data. The data in each subbufs
+ * are checked later in rb_meta_validate_events().
+ */
for (i = 0; i < meta->nr_subbufs; i++) {
if (meta->buffers[i] < 0 ||
meta->buffers[i] >= meta->nr_subbufs) {
@@ -1798,18 +1809,12 @@ static bool rb_cpu_meta_valid(struct ring_buffer_cpu_meta *meta, int cpu,
return false;
}
- if ((unsigned)local_read(&subbuf->commit) > subbuf_size) {
- pr_info("Ring buffer boot meta [%d] buffer invalid commit\n", cpu);
- return false;
- }
-
if (test_bit(meta->buffers[i], subbuf_mask)) {
pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu);
return false;
}
set_bit(meta->buffers[i], subbuf_mask);
- subbuf = (void *)subbuf + subbuf_size;
}
return true;
@@ -1873,13 +1878,22 @@ static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu
return events;
}
-static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
+static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
+ struct ring_buffer_cpu_meta *meta)
{
unsigned long long ts;
+ unsigned long tail;
u64 delta;
- int tail;
- tail = local_read(&dpage->commit);
+ /*
+ * When a sub-buffer is recovered from a read, the commit value may
+ * have RB_MISSED_* bits set, as these bits are reset on reuse.
+ * Even after clearing these bits, a commit value greater than the
+ * subbuf_size is considered invalid.
+ */
+ tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
+ if (tail > meta->subbuf_size - BUF_PAGE_HDR_SIZE)
+ return -1;
return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
}
@@ -1890,6 +1904,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
struct buffer_page *head_page, *orig_head, *orig_reader;
unsigned long entry_bytes = 0;
unsigned long entries = 0;
+ int discarded = 0;
int ret;
u64 ts;
int i;
@@ -1901,14 +1916,19 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
orig_reader = cpu_buffer->reader_page;
/* Do the reader page first */
- ret = rb_validate_buffer(orig_reader->page, cpu_buffer->cpu);
+ ret = rb_validate_buffer(orig_reader->page, cpu_buffer->cpu, meta);
if (ret < 0) {
- pr_info("Ring buffer reader page is invalid\n");
- goto invalid;
+ pr_info("Ring buffer meta [%d] invalid reader page detected\n",
+ cpu_buffer->cpu);
+ discarded++;
+ /* Instead of discard whole ring buffer, discard only this sub-buffer. */
+ local_set(&orig_reader->entries, 0);
+ local_set(&orig_reader->page->commit, 0);
+ } else {
+ entries += ret;
+ entry_bytes += rb_page_size(orig_reader);
+ local_set(&orig_reader->entries, ret);
}
- entries += ret;
- entry_bytes += local_read(&orig_reader->page->commit);
- local_set(&orig_reader->entries, ret);
ts = head_page->page->time_stamp;
@@ -1936,7 +1956,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
break;
/* Stop rewind if the page is invalid. */
- ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+ ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
if (ret < 0)
break;
@@ -1945,7 +1965,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (ret)
local_inc(&cpu_buffer->pages_touched);
entries += ret;
- entry_bytes += rb_page_commit(head_page);
+ entry_bytes += rb_page_size(head_page);
}
if (i)
pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
@@ -2015,21 +2035,24 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (head_page == orig_reader)
continue;
- ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+ ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
if (ret < 0) {
- pr_info("Ring buffer meta [%d] invalid buffer page\n",
- cpu_buffer->cpu);
- goto invalid;
- }
-
- /* If the buffer has content, update pages_touched */
- if (ret)
- local_inc(&cpu_buffer->pages_touched);
-
- entries += ret;
- entry_bytes += local_read(&head_page->page->commit);
- local_set(&head_page->entries, ret);
+ if (!discarded)
+ pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
+ cpu_buffer->cpu);
+ discarded++;
+ /* Instead of discard whole ring buffer, discard only this sub-buffer. */
+ local_set(&head_page->entries, 0);
+ local_set(&head_page->page->commit, 0);
+ } else {
+ /* If the buffer has content, update pages_touched */
+ if (ret)
+ local_inc(&cpu_buffer->pages_touched);
+ entries += ret;
+ entry_bytes += rb_page_size(head_page);
+ local_set(&head_page->entries, ret);
+ }
if (head_page == cpu_buffer->commit_page)
break;
}
@@ -2043,7 +2066,10 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->entries, entries);
local_set(&cpu_buffer->entries_bytes, entry_bytes);
- pr_info("Ring buffer meta [%d] is from previous boot!\n", cpu_buffer->cpu);
+ pr_info("Ring buffer meta [%d] is from previous boot!", cpu_buffer->cpu);
+ if (discarded)
+ pr_cont(" (%d pages discarded)", discarded);
+ pr_cont("\n");
return;
invalid:
@@ -3330,12 +3356,6 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
return NULL;
}
-/* Size is determined by what has been committed */
-static __always_inline unsigned rb_page_size(struct buffer_page *bpage)
-{
- return rb_page_commit(bpage) & ~RB_MISSED_MASK;
-}
-
static __always_inline unsigned
rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -5635,8 +5655,9 @@ __rb_get_reader_page_from_remote(struct ring_buffer_per_cpu *cpu_buffer)
static struct buffer_page *
__rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct buffer_page *reader = NULL;
+ int max_loops = cpu_buffer->ring_meta ? cpu_buffer->nr_pages : 3;
unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size);
+ struct buffer_page *reader = NULL;
unsigned long overwrite;
unsigned long flags;
int nr_loops = 0;
@@ -5648,11 +5669,14 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
again:
/*
* This should normally only loop twice. But because the
- * start of the reader inserts an empty page, it causes
- * a case where we will loop three times. There should be no
- * reason to loop four times (that I know of).
+ * start of the reader inserts an empty page, it causes a
+ * case where we will loop three times. There should be no
+ * reason to loop four times unless the ring buffer is a
+ * recovered persistent ring buffer. For persistent ring buffers,
+ * invalid pages are reset during recovery, so there may be more
+ * than 3 contiguous pages can be empty, but less than nr_pages.
*/
- if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
+ if (RB_WARN_ON(cpu_buffer, ++nr_loops > max_loops)) {
reader = NULL;
goto out;
}
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v20 02/10] ring-buffer: Skip invalid sub-buffers when rewinding persistent ring buffer
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 01/10] ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-21 14:17 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 03/10] ring-buffer: Add persistent ring buffer invalid-page inject test Steven Rostedt
` (8 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Skip invalid sub-buffers when rewinding the persistent ring buffer
instead of stopping the rewinding the ring buffer. The skipped
buffers are cleared.
To ensure the rewinding stops at the unused page, this also clears
buffer_data_page::time_stamp when tracing resets the buffer. This
allows us to identify unused pages and empty pages.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 102 +++++++++++++++++++++++--------------
1 file changed, 63 insertions(+), 39 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 97ef702655f5..dca27ed6a3a1 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -363,6 +363,7 @@ struct buffer_page {
static void rb_init_page(struct buffer_data_page *bpage)
{
local_set(&bpage->commit, 0);
+ bpage->time_stamp = 0;
}
static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
@@ -1878,12 +1879,14 @@ static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu
return events;
}
-static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
- struct ring_buffer_cpu_meta *meta)
+static int rb_validate_buffer(struct buffer_page *bpage, int cpu,
+ struct ring_buffer_cpu_meta *meta, u64 prev_ts, u64 next_ts)
{
+ struct buffer_data_page *dpage = bpage->page;
unsigned long long ts;
unsigned long tail;
u64 delta;
+ int ret;
/*
* When a sub-buffer is recovered from a read, the commit value may
@@ -1892,9 +1895,27 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
* subbuf_size is considered invalid.
*/
tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
- if (tail > meta->subbuf_size - BUF_PAGE_HDR_SIZE)
- return -1;
- return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
+ if (tail <= meta->subbuf_size - BUF_PAGE_HDR_SIZE)
+ ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
+ else
+ ret = -1;
+
+ /*
+ * The timestamp must be greater than @prev_ts and smaller than @next_ts.
+ * Since this function works in both forward (verify) and reverse (unwind)
+ * loop, we don't know both @prev_ts and @next_ts at the same time.
+ * So use the known boundary as the boundary.
+ */
+ if (ret < 0 || (prev_ts && prev_ts > ts) || (next_ts && ts > next_ts)) {
+ local_set(&bpage->entries, 0);
+ local_set(&dpage->commit, 0);
+ dpage->time_stamp = prev_ts ? prev_ts : next_ts;
+ ret = -1;
+ } else {
+ local_set(&bpage->entries, ret);
+ }
+
+ return ret;
}
/* If the meta data has been validated, now validate the events */
@@ -1915,25 +1936,29 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
orig_head = head_page = cpu_buffer->head_page;
orig_reader = cpu_buffer->reader_page;
- /* Do the reader page first */
- ret = rb_validate_buffer(orig_reader->page, cpu_buffer->cpu, meta);
+ /* Do the head page first */
+ ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, 0);
+ if (ret < 0) {
+ pr_info("Ring buffer meta [%d] invalid head page detected\n",
+ cpu_buffer->cpu);
+ goto skip_rewind;
+ }
+ ts = head_page->page->time_stamp;
+
+ /* Do the reader page - reader must be previous to head. */
+ ret = rb_validate_buffer(orig_reader, cpu_buffer->cpu, meta, 0, ts);
if (ret < 0) {
pr_info("Ring buffer meta [%d] invalid reader page detected\n",
cpu_buffer->cpu);
discarded++;
- /* Instead of discard whole ring buffer, discard only this sub-buffer. */
- local_set(&orig_reader->entries, 0);
- local_set(&orig_reader->page->commit, 0);
} else {
entries += ret;
entry_bytes += rb_page_size(orig_reader);
- local_set(&orig_reader->entries, ret);
+ ts = orig_reader->page->time_stamp;
}
- ts = head_page->page->time_stamp;
-
/*
- * Try to rewind the head so that we can read the pages which already
+ * Try to rewind the head so that we can read the pages which are already
* read in the previous boot.
*/
if (head_page == cpu_buffer->tail_page)
@@ -1946,26 +1971,27 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (head_page == cpu_buffer->tail_page)
break;
- /* Ensure the page has older data than head. */
- if (ts < head_page->page->time_stamp)
+ /* Rewind until unused page (no timestamp, no commit). */
+ if (!head_page->page->time_stamp && rb_page_commit(head_page) == 0)
break;
- ts = head_page->page->time_stamp;
- /* Ensure the page has correct timestamp and some data. */
- if (!ts || rb_page_commit(head_page) == 0)
- break;
-
- /* Stop rewind if the page is invalid. */
- ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
- if (ret < 0)
- break;
-
- /* Recover the number of entries and update stats. */
- local_set(&head_page->entries, ret);
- if (ret)
- local_inc(&cpu_buffer->pages_touched);
- entries += ret;
- entry_bytes += rb_page_size(head_page);
+ /*
+ * Skip if the page is invalid, or its timestamp is newer than the
+ * previous valid page.
+ */
+ ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, ts);
+ if (ret < 0) {
+ if (!discarded)
+ pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
+ cpu_buffer->cpu);
+ discarded++;
+ } else {
+ entries += ret;
+ entry_bytes += rb_page_size(head_page);
+ if (ret > 0)
+ local_inc(&cpu_buffer->pages_touched);
+ ts = head_page->page->time_stamp;
+ }
}
if (i)
pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
@@ -2027,6 +2053,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
/* Nothing more to do, the only page is the reader page */
goto done;
}
+ ts = head_page->page->time_stamp;
/* Iterate until finding the commit page */
for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) {
@@ -2035,15 +2062,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (head_page == orig_reader)
continue;
- ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu, meta);
+ ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, ts, 0);
if (ret < 0) {
if (!discarded)
pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
cpu_buffer->cpu);
discarded++;
- /* Instead of discard whole ring buffer, discard only this sub-buffer. */
- local_set(&head_page->entries, 0);
- local_set(&head_page->page->commit, 0);
} else {
/* If the buffer has content, update pages_touched */
if (ret)
@@ -2051,7 +2075,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
entries += ret;
entry_bytes += rb_page_size(head_page);
- local_set(&head_page->entries, ret);
+ ts = head_page->page->time_stamp;
}
if (head_page == cpu_buffer->commit_page)
break;
@@ -2079,12 +2103,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
/* Reset the reader page */
local_set(&cpu_buffer->reader_page->entries, 0);
- local_set(&cpu_buffer->reader_page->page->commit, 0);
+ rb_init_page(cpu_buffer->reader_page->page);
/* Reset all the subbuffers */
for (i = 0; i < meta->nr_subbufs - 1; i++, rb_inc_page(&head_page)) {
local_set(&head_page->entries, 0);
- local_set(&head_page->page->commit, 0);
+ rb_init_page(head_page->page);
}
}
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v20 02/10] ring-buffer: Skip invalid sub-buffers when rewinding persistent ring buffer
2026-05-20 18:49 ` [PATCH v20 02/10] ring-buffer: Skip invalid sub-buffers when rewinding " Steven Rostedt
@ 2026-05-21 14:17 ` Steven Rostedt
0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 14:17 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers, sashiko-bot@kernel.org,
sashiko-reviews@lists.linux.dev
[
Replying to Sashiko from v19: https://sashiko.dev/?list=org.kernel.vger.linux-trace-kernel#/patchset/177751968499.2136606.17388366710182662849.stgit%40mhiramat.tok.corp.google.com
]
> > @@ -1892,9 +1895,27 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu,
> > * subbuf_size is considered invalid.
> > */
> > tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
> > - if (tail > meta->subbuf_size - BUF_PAGE_HDR_SIZE)
> > - return -1;
> > - return rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
> > + if (tail <= meta->subbuf_size - BUF_PAGE_HDR_SIZE)
> > + ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
> Since tail is read from untrusted persistent memory, does this call evaluating
The persistent memory is not untrusted.
> event headers verify enough bytes remain in the sub-buffer?
> If tail is set to the maximum allowed value, and preceding events are crafted
> so that the last event starts at tail - 1, the event pointer inside
> rb_read_data_buffer() will point to the very last byte.
> Calling rb_event_length(event) will evaluate event->type_len via a 32-bit
> read which could read past the page boundary and cause a regression if the
> adjacent page is unmapped.
> Should the parsing loop verify tail - e >= sizeof(struct ring_buffer_event)
> before evaluating the event?
> > + else
> > + ret = -1;
> > +
> > + /*
> > + * The timestamp must be greater than @prev_ts and smaller than @next_ts.
> > + * Since this function works in both forward (verify) and reverse (unwind)
> > + * loop, we don't know both @prev_ts and @next_ts at the same time.
> > + * So use the known boundary as the boundary.
> > + */
> > + if (ret < 0 || (prev_ts && prev_ts > ts) || (next_ts && ts > next_ts)) {
> Does this timestamp validation properly enforce strictly monotonic page ordering?
> Inside rb_validate_buffer(), ts is updated by rb_read_data_buffer() to
> the timestamp of the last event in the current page. The condition
> (prev_ts && prev_ts > ts) only checks that the previous page's start time
> is not greater than the current page's end time.
> Could a corrupted page with a start timestamp earlier than the previous
> page incorrectly pass validation if its final event timestamp is large enough?
> Would it prevent this regression to enforce that the current page starts after
> the previous page by checking prev_ts > dpage->time_stamp?
This may be an enhancement but shouldn't be critical for what is happening now.
> > + local_set(&bpage->entries, 0);
> [ ... ]
> > @@ -1915,25 +1936,29 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
> > orig_head = head_page = cpu_buffer->head_page;
> > orig_reader = cpu_buffer->reader_page;
> >
> > - /* Do the reader page first */
> > - ret = rb_validate_buffer(orig_reader->page, cpu_buffer->cpu, meta);
> > + /* Do the head page first */
> > + ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, 0);
> > + if (ret < 0) {
> > + pr_info("Ring buffer meta [%d] invalid head page detected\n",
> > + cpu_buffer->cpu);
> > + goto skip_rewind;
> > + }
> > + ts = head_page->page->time_stamp;
> > +
> > + /* Do the reader page - reader must be previous to head. */
> > + ret = rb_validate_buffer(orig_reader, cpu_buffer->cpu, meta, 0, ts);
> If rb_validate_buffer() for the head page fails, we take the goto skip_rewind
> path. Since skip_rewind jumps past this orig_reader validation, and the
> iteration loop explicitly skips orig_reader:
> /* Iterate until finding the commit page */
> for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) {
> /* The original reader page has already been checked/counted. */
> if (head_page == orig_reader)
> continue;
> does this mean the reader page escapes validation entirely, introducing a
> regression?
This is a bug and needs to be fixed. If the head page is invalid it
shouldn't skip the rewind but instead jump to the "invalid:" label.
> If an attacker provides a corrupted persistent ring buffer with an invalid
> head page, would the kernel blindly trust the unvalidated commit length
> and event data of the reader page?
Not worried about attackers but I am worried about corrupted memory.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v20 03/10] ring-buffer: Add persistent ring buffer invalid-page inject test
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 01/10] ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 02/10] ring-buffer: Skip invalid sub-buffers when rewinding " Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 04/10] ring-buffer: Show commit numbers in buffer_meta file Steven Rostedt
` (7 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Add a self-corrupting test for the persistent ring buffer.
This will inject an erroneous value to some sub-buffer pages (where
the index is even or multiples of 5) in the persistent ring buffer
when the kernel panics, and checks whether the number of detected
invalid pages and the total entry_bytes are the same as the recorded
values after reboot.
This ensures that the kernel can correctly recover a partially
corrupted persistent ring buffer after a reboot or panic.
The test only runs on the persistent ring buffer whose name is
"ptracingtest". The user has to fill it with events before a
kernel panic.
To run the test, enable CONFIG_RING_BUFFER_PERSISTENT_INJECT
and add the following kernel cmdline:
reserve_mem=20M:2M:trace trace_instance=ptracingtest^traceoff@trace
panic=1
Run the following commands after the 1st boot:
cd /sys/kernel/tracing/instances/ptracingtest
echo 1 > tracing_on
echo 1 > events/enable
sleep 3
echo c > /proc/sysrq-trigger
After panic message, the kernel will reboot and run the verification
on the persistent ring buffer, e.g.
Ring buffer meta [2] invalid buffer page detected
Ring buffer meta [2] is from previous boot! (318 pages discarded)
Ring buffer testing [2] invalid pages: PASSED (318/318)
Ring buffer testing [2] entry_bytes: PASSED (1300476/1300476)
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/ring_buffer.h | 1 +
kernel/trace/Kconfig | 34 ++++++++++++++++
kernel/trace/ring_buffer.c | 79 +++++++++++++++++++++++++++++++++++++
kernel/trace/trace.c | 4 ++
4 files changed, 118 insertions(+)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 994f52b34344..0670742b2d60 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -238,6 +238,7 @@ int ring_buffer_subbuf_size_get(struct trace_buffer *buffer);
enum ring_buffer_flags {
RB_FL_OVERWRITE = 1 << 0,
+ RB_FL_TESTING = 1 << 1,
};
#ifdef CONFIG_RING_BUFFER
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e130da35808f..084f34dc6c9f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1202,6 +1202,40 @@ config RING_BUFFER_VALIDATE_TIME_DELTAS
Only say Y if you understand what this does, and you
still want it enabled. Otherwise say N
+config RING_BUFFER_PERSISTENT_INJECT
+ bool "Enable persistent ring buffer error injection test"
+ depends on RING_BUFFER
+ help
+ This option will have the kernel check if the persistent ring
+ buffer is named "ptracingtest". and if so, it will corrupt some
+ of its pages on a kernel panic. This is used to test if the
+ persistent ring buffer can recover from some of its sub-buffers
+ being corrupted.
+ To use this, boot a kernel with a "ptracingtest" persistent
+ ring buffer, e.g.
+
+ reserve_mem=20M:2M:trace trace_instance=ptracingtest@trace panic=1
+
+ And after the 1st boot, run the following commands:
+
+ cd /sys/kernel/tracing/instances/ptracingtest
+ echo 1 > events/enable
+ echo 1 > tracing_on
+ sleep 3
+ echo c > /proc/sysrq-trigger
+
+ After the panic message, the kernel will reboot and will show
+ the test results in the console output.
+
+ Note that events for the test ring buffer needs to be enabled
+ prior to crashing the kernel so that the ring buffer has content
+ that the test will corrupt.
+ As the test will corrupt events in the "ptracingtest" persistent
+ ring buffer, it should not be used for any other purpose other
+ than this test.
+
+ If unsure, say N
+
config MMIOTRACE_TEST
tristate "Test module for mmiotrace"
depends on MMIOTRACE && m
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dca27ed6a3a1..ce645ca8425d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -64,6 +64,10 @@ struct ring_buffer_cpu_meta {
unsigned long commit_buffer;
__u32 subbuf_size;
__u32 nr_subbufs;
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+ __u32 nr_invalid;
+ __u32 entry_bytes;
+#endif
int buffers[];
};
@@ -2094,6 +2098,21 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (discarded)
pr_cont(" (%d pages discarded)", discarded);
pr_cont("\n");
+
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+ if (meta->nr_invalid)
+ pr_warn("Ring buffer testing [%d] invalid pages: %s (%d/%d)\n",
+ cpu_buffer->cpu,
+ (discarded == meta->nr_invalid) ? "PASSED" : "FAILED",
+ discarded, meta->nr_invalid);
+ if (meta->entry_bytes)
+ pr_warn("Ring buffer testing [%d] entry_bytes: %s (%ld/%ld)\n",
+ cpu_buffer->cpu,
+ (entry_bytes == meta->entry_bytes) ? "PASSED" : "FAILED",
+ (long)entry_bytes, (long)meta->entry_bytes);
+ meta->nr_invalid = 0;
+ meta->entry_bytes = 0;
+#endif
return;
invalid:
@@ -2574,12 +2593,72 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
kfree(cpu_buffer);
}
+#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
+static void rb_test_inject_invalid_pages(struct trace_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_cpu_meta *meta;
+ struct buffer_data_page *dpage;
+ unsigned long entry_bytes = 0;
+ unsigned long ptr;
+ int subbuf_size;
+ int invalid = 0;
+ int cpu;
+ int i;
+
+ if (!(buffer->flags & RB_FL_TESTING))
+ return;
+
+ guard(preempt)();
+ cpu = smp_processor_id();
+
+ cpu_buffer = buffer->buffers[cpu];
+ if (!cpu_buffer)
+ return;
+ meta = cpu_buffer->ring_meta;
+ if (!meta)
+ return;
+
+ ptr = (unsigned long)rb_subbufs_from_meta(meta);
+ subbuf_size = meta->subbuf_size;
+
+ for (i = 0; i < meta->nr_subbufs; i++) {
+ unsigned long idx = meta->buffers[i];
+
+ dpage = (void *)(ptr + idx * subbuf_size);
+ /* Skip unused pages */
+ if (!local_read(&dpage->commit))
+ continue;
+
+ /*
+ * Invalidate even pages or multiples of 5. This will cause 3
+ * contiguous invalidated(empty) pages.
+ */
+ if (!(i & 0x1) || !(i % 5)) {
+ local_add(subbuf_size + 1, &dpage->commit);
+ invalid++;
+ } else {
+ /* Count total commit bytes. */
+ entry_bytes += local_read(&dpage->commit) & ~RB_MISSED_MASK;
+ }
+ }
+
+ pr_info("Inject invalidated %d pages on CPU%d, total size: %ld\n",
+ invalid, cpu, (long)entry_bytes);
+ meta->nr_invalid = invalid;
+ meta->entry_bytes = entry_bytes;
+}
+#else /* !CONFIG_RING_BUFFER_PERSISTENT_INJECT */
+#define rb_test_inject_invalid_pages(buffer) do { } while (0)
+#endif
+
/* Stop recording on a persistent buffer and flush cache if needed. */
static int rb_flush_buffer_cb(struct notifier_block *nb, unsigned long event, void *data)
{
struct trace_buffer *buffer = container_of(nb, struct trace_buffer, flush_nb);
ring_buffer_record_off(buffer);
+ rb_test_inject_invalid_pages(buffer);
arch_ring_buffer_flush_range(buffer->range_addr_start, buffer->range_addr_end);
return NOTIFY_DONE;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6eb4d3097a4d..4573f65d68ce 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8383,6 +8383,8 @@ static void setup_trace_scratch(struct trace_array *tr,
memset(tscratch, 0, size);
}
+#define TRACE_TEST_PTRACING_NAME "ptracingtest"
+
int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
{
enum ring_buffer_flags rb_flags;
@@ -8394,6 +8396,8 @@ int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int
buf->tr = tr;
if (tr->range_addr_start && tr->range_addr_size) {
+ if (tr->name && !strcmp(tr->name, TRACE_TEST_PTRACING_NAME))
+ rb_flags |= RB_FL_TESTING;
/* Add scratch buffer to handle 128 modules */
buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
tr->range_addr_start,
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v20 04/10] ring-buffer: Show commit numbers in buffer_meta file
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (2 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 03/10] ring-buffer: Add persistent ring buffer invalid-page inject test Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 05/10] ring-buffer: Cleanup persistent ring buffer validation Steven Rostedt
` (6 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
In addition to the index number, show the commit numbers of
each data page in the per_cpu buffer_meta file.
This is useful for understanding the current status of the
persistent ring buffer. (Note that this file is shown
only for persistent ring buffer and its backup instance)
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ce645ca8425d..695398d72fbb 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2224,6 +2224,7 @@ static int rbm_show(struct seq_file *m, void *v)
struct ring_buffer_per_cpu *cpu_buffer = m->private;
struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
unsigned long val = (unsigned long)v;
+ struct buffer_data_page *dpage;
if (val == 1) {
seq_printf(m, "head_buffer: %d\n",
@@ -2236,7 +2237,9 @@ static int rbm_show(struct seq_file *m, void *v)
}
val -= 2;
- seq_printf(m, "buffer[%ld]: %d\n", val, meta->buffers[val]);
+ dpage = rb_range_buffer(cpu_buffer, val);
+ seq_printf(m, "buffer[%ld]: %d (commit: %ld)\n",
+ val, meta->buffers[val], dpage ? local_read(&dpage->commit) : -1);
return 0;
}
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v20 05/10] ring-buffer: Cleanup persistent ring buffer validation
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (3 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 04/10] ring-buffer: Show commit numbers in buffer_meta file Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 06/10] ring-buffer: Cleanup buffer_data_page related code Steven Rostedt
` (5 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Cleanup rb_meta_validate_events() function to make it easier to read.
This includes the following cleanups:
- Introduce rb_validatation_state to hold working variables in
validation.
- Move repleated validation state updates into rb_validate_buffer().
- Move reader_page injection code outside of rb_meta_validate_events().
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 198 ++++++++++++++++++++-----------------
1 file changed, 107 insertions(+), 91 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 695398d72fbb..3f1dd75ba332 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1883,8 +1883,16 @@ static int rb_read_data_buffer(struct buffer_data_page *dpage, int tail, int cpu
return events;
}
-static int rb_validate_buffer(struct buffer_page *bpage, int cpu,
- struct ring_buffer_cpu_meta *meta, u64 prev_ts, u64 next_ts)
+struct rb_validation_state {
+ unsigned long entries;
+ unsigned long entry_bytes;
+ int discarded;
+ u64 ts;
+};
+
+static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
+ struct ring_buffer_cpu_meta *meta,
+ u64 prev_ts, u64 next_ts)
{
struct buffer_data_page *dpage = bpage->page;
unsigned long long ts;
@@ -1922,16 +1930,94 @@ static int rb_validate_buffer(struct buffer_page *bpage, int cpu,
return ret;
}
+/**
+ * rb_validate_buffer - validates a single buffer page and updates the state.
+ * @bpage: buffer page to validate
+ * @cpu_buffer: cpu_buffer this page belongs to
+ * @meta: meta of the cpu_buffer
+ * @state: validation state
+ * @prev_ts: previous buffer's timestamp (optional)
+ * @next_ts: next buffer's timestamp (optional)
+ *
+ * If the page is invalid (wrong event length or timestamp), it increments the
+ * discarded counter and warns it. Otherwise, it updates the validation state.
+ */
+static void rb_validate_buffer(struct buffer_page *bpage,
+ struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_cpu_meta *meta,
+ struct rb_validation_state *state,
+ u64 prev_ts, u64 next_ts)
+{
+ int ret;
+
+ ret = __rb_validate_buffer(bpage, cpu_buffer->cpu, meta, prev_ts, next_ts);
+ if (ret < 0) {
+ if (!state->discarded)
+ pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
+ cpu_buffer->cpu);
+ state->discarded++;
+ } else {
+ /* If the buffer has content, update pages_touched */
+ if (ret)
+ local_inc(&cpu_buffer->pages_touched);
+
+ state->entries += ret;
+ state->entry_bytes += rb_page_size(bpage);
+ state->ts = bpage->page->time_stamp;
+ }
+}
+
+static void rb_meta_inject_reader_page(struct ring_buffer_per_cpu *cpu_buffer,
+ struct ring_buffer_cpu_meta *meta,
+ struct buffer_page *orig_head,
+ struct buffer_page *head_page)
+{
+ struct buffer_page *bpage = orig_head;
+ int i;
+
+ rb_dec_page(&bpage);
+ /*
+ * Insert the reader_page before the original head page.
+ * Since the list encode RB_PAGE flags, general list
+ * operations should be avoided.
+ */
+ cpu_buffer->reader_page->list.next = &orig_head->list;
+ cpu_buffer->reader_page->list.prev = orig_head->list.prev;
+ orig_head->list.prev = &cpu_buffer->reader_page->list;
+ bpage->list.next = &cpu_buffer->reader_page->list;
+
+ /* Make the head_page the reader page */
+ cpu_buffer->reader_page = head_page;
+ bpage = head_page;
+ rb_inc_page(&head_page);
+ head_page->list.prev = bpage->list.prev;
+ rb_dec_page(&bpage);
+ bpage->list.next = &head_page->list;
+ rb_set_list_to_head(&bpage->list);
+ cpu_buffer->pages = &head_page->list;
+
+ cpu_buffer->head_page = head_page;
+ meta->head_buffer = (unsigned long)head_page->page;
+
+ /* Reset all the indexes */
+ bpage = cpu_buffer->reader_page;
+ meta->buffers[0] = rb_meta_subbuf_idx(meta, bpage->page);
+ bpage->id = 0;
+
+ for (i = 1, bpage = head_page; i < meta->nr_subbufs;
+ i++, rb_inc_page(&bpage)) {
+ meta->buffers[i] = rb_meta_subbuf_idx(meta, bpage->page);
+ bpage->id = i;
+ }
+}
+
/* If the meta data has been validated, now validate the events */
static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
{
struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
struct buffer_page *head_page, *orig_head, *orig_reader;
- unsigned long entry_bytes = 0;
- unsigned long entries = 0;
- int discarded = 0;
+ struct rb_validation_state state = { 0 };
int ret;
- u64 ts;
int i;
if (!meta || !meta->head_buffer)
@@ -1941,25 +2027,16 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
orig_reader = cpu_buffer->reader_page;
/* Do the head page first */
- ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, 0);
+ ret = __rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, 0);
if (ret < 0) {
pr_info("Ring buffer meta [%d] invalid head page detected\n",
cpu_buffer->cpu);
goto skip_rewind;
}
- ts = head_page->page->time_stamp;
+ state.ts = head_page->page->time_stamp;
/* Do the reader page - reader must be previous to head. */
- ret = rb_validate_buffer(orig_reader, cpu_buffer->cpu, meta, 0, ts);
- if (ret < 0) {
- pr_info("Ring buffer meta [%d] invalid reader page detected\n",
- cpu_buffer->cpu);
- discarded++;
- } else {
- entries += ret;
- entry_bytes += rb_page_size(orig_reader);
- ts = orig_reader->page->time_stamp;
- }
+ rb_validate_buffer(orig_reader, cpu_buffer, meta, &state, 0, state.ts);
/*
* Try to rewind the head so that we can read the pages which are already
@@ -1983,19 +2060,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
* Skip if the page is invalid, or its timestamp is newer than the
* previous valid page.
*/
- ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, 0, ts);
- if (ret < 0) {
- if (!discarded)
- pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
- cpu_buffer->cpu);
- discarded++;
- } else {
- entries += ret;
- entry_bytes += rb_page_size(head_page);
- if (ret > 0)
- local_inc(&cpu_buffer->pages_touched);
- ts = head_page->page->time_stamp;
- }
+ rb_validate_buffer(head_page, cpu_buffer, meta, &state, 0, state.ts);
}
if (i)
pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
@@ -2009,43 +2074,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
* into the location just before the original head page.
*/
if (head_page != orig_head) {
- struct buffer_page *bpage = orig_head;
-
- rb_dec_page(&bpage);
- /*
- * Insert the reader_page before the original head page.
- * Since the list encode RB_PAGE flags, general list
- * operations should be avoided.
- */
- cpu_buffer->reader_page->list.next = &orig_head->list;
- cpu_buffer->reader_page->list.prev = orig_head->list.prev;
- orig_head->list.prev = &cpu_buffer->reader_page->list;
- bpage->list.next = &cpu_buffer->reader_page->list;
-
- /* Make the head_page the reader page */
- cpu_buffer->reader_page = head_page;
- bpage = head_page;
- rb_inc_page(&head_page);
- head_page->list.prev = bpage->list.prev;
- rb_dec_page(&bpage);
- bpage->list.next = &head_page->list;
- rb_set_list_to_head(&bpage->list);
- cpu_buffer->pages = &head_page->list;
-
- cpu_buffer->head_page = head_page;
- meta->head_buffer = (unsigned long)head_page->page;
-
- /* Reset all the indexes */
- bpage = cpu_buffer->reader_page;
- meta->buffers[0] = rb_meta_subbuf_idx(meta, bpage->page);
- bpage->id = 0;
-
- for (i = 1, bpage = head_page; i < meta->nr_subbufs;
- i++, rb_inc_page(&bpage)) {
- meta->buffers[i] = rb_meta_subbuf_idx(meta, bpage->page);
- bpage->id = i;
- }
-
+ rb_meta_inject_reader_page(cpu_buffer, meta, orig_head, head_page);
/* We'll restart verifying from orig_head */
head_page = orig_head;
}
@@ -2057,7 +2086,7 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
/* Nothing more to do, the only page is the reader page */
goto done;
}
- ts = head_page->page->time_stamp;
+ state.ts = head_page->page->time_stamp;
/* Iterate until finding the commit page */
for (i = 0; i < meta->nr_subbufs + 1; i++, rb_inc_page(&head_page)) {
@@ -2066,21 +2095,8 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
if (head_page == orig_reader)
continue;
- ret = rb_validate_buffer(head_page, cpu_buffer->cpu, meta, ts, 0);
- if (ret < 0) {
- if (!discarded)
- pr_info("Ring buffer meta [%d] invalid buffer page detected\n",
- cpu_buffer->cpu);
- discarded++;
- } else {
- /* If the buffer has content, update pages_touched */
- if (ret)
- local_inc(&cpu_buffer->pages_touched);
+ rb_validate_buffer(head_page, cpu_buffer, meta, &state, state.ts, 0);
- entries += ret;
- entry_bytes += rb_page_size(head_page);
- ts = head_page->page->time_stamp;
- }
if (head_page == cpu_buffer->commit_page)
break;
}
@@ -2091,25 +2107,25 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
goto invalid;
}
done:
- local_set(&cpu_buffer->entries, entries);
- local_set(&cpu_buffer->entries_bytes, entry_bytes);
+ local_set(&cpu_buffer->entries, state.entries);
+ local_set(&cpu_buffer->entries_bytes, state.entry_bytes);
pr_info("Ring buffer meta [%d] is from previous boot!", cpu_buffer->cpu);
- if (discarded)
- pr_cont(" (%d pages discarded)", discarded);
+ if (state.discarded)
+ pr_cont(" (%d pages discarded)", state.discarded);
pr_cont("\n");
#ifdef CONFIG_RING_BUFFER_PERSISTENT_INJECT
if (meta->nr_invalid)
pr_warn("Ring buffer testing [%d] invalid pages: %s (%d/%d)\n",
cpu_buffer->cpu,
- (discarded == meta->nr_invalid) ? "PASSED" : "FAILED",
- discarded, meta->nr_invalid);
+ (state.discarded == meta->nr_invalid) ? "PASSED" : "FAILED",
+ state.discarded, meta->nr_invalid);
if (meta->entry_bytes)
pr_warn("Ring buffer testing [%d] entry_bytes: %s (%ld/%ld)\n",
cpu_buffer->cpu,
- (entry_bytes == meta->entry_bytes) ? "PASSED" : "FAILED",
- (long)entry_bytes, (long)meta->entry_bytes);
+ (state.entry_bytes == meta->entry_bytes) ? "PASSED" : "FAILED",
+ (long)state.entry_bytes, (long)meta->entry_bytes);
meta->nr_invalid = 0;
meta->entry_bytes = 0;
#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v20 06/10] ring-buffer: Cleanup buffer_data_page related code
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (4 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 05/10] ring-buffer: Cleanup persistent ring buffer validation Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-20 18:49 ` [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator Steven Rostedt
` (4 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Code cleanup related to buffer_data_page for readability,
which includes:
- Introduce rb_data_page_commit() and rb_data_page_size()
- Use 'dpage' for buffer_data_page, instead of 'bpage' because
'bpage' is used for buffer_page.
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 112 ++++++++++++++++++++-----------------
1 file changed, 60 insertions(+), 52 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f1dd75ba332..c6c2f92bfc24 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -364,21 +364,30 @@ struct buffer_page {
#define RB_WRITE_MASK 0xfffff
#define RB_WRITE_INTCNT (1 << 20)
-static void rb_init_page(struct buffer_data_page *bpage)
+static void rb_init_data_page(struct buffer_data_page *bpage)
{
local_set(&bpage->commit, 0);
bpage->time_stamp = 0;
}
+static __always_inline long rb_data_page_commit(struct buffer_data_page *dpage)
+{
+ return local_read(&dpage->commit);
+}
+
+static __always_inline long rb_data_page_size(struct buffer_data_page *dpage)
+{
+ return rb_data_page_commit(dpage) & ~RB_MISSED_MASK;
+}
+
static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
{
- return local_read(&bpage->page->commit);
+ return rb_data_page_commit(bpage->page);
}
-/* Size is determined by what has been committed */
static __always_inline unsigned int rb_page_size(struct buffer_page *bpage)
{
- return rb_page_commit(bpage) & ~RB_MISSED_MASK;
+ return rb_data_page_size(bpage->page);
}
static void free_buffer_page(struct buffer_page *bpage)
@@ -419,7 +428,7 @@ static struct buffer_data_page *alloc_cpu_data(int cpu, int order)
return NULL;
dpage = page_address(page);
- rb_init_page(dpage);
+ rb_init_data_page(dpage);
return dpage;
}
@@ -659,7 +668,7 @@ static void verify_event(struct ring_buffer_per_cpu *cpu_buffer,
do {
if (page == tail_page || WARN_ON_ONCE(stop++ > 100))
done = true;
- commit = local_read(&page->page->commit);
+ commit = rb_page_commit(page);
write = local_read(&page->write);
if (addr >= (unsigned long)&page->page->data[commit] &&
addr < (unsigned long)&page->page->data[write])
@@ -1906,7 +1915,7 @@ static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
* Even after clearing these bits, a commit value greater than the
* subbuf_size is considered invalid.
*/
- tail = local_read(&dpage->commit) & ~RB_MISSED_MASK;
+ tail = rb_data_page_size(dpage);
if (tail <= meta->subbuf_size - BUF_PAGE_HDR_SIZE)
ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
else
@@ -2138,12 +2147,12 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
/* Reset the reader page */
local_set(&cpu_buffer->reader_page->entries, 0);
- rb_init_page(cpu_buffer->reader_page->page);
+ rb_init_data_page(cpu_buffer->reader_page->page);
/* Reset all the subbuffers */
for (i = 0; i < meta->nr_subbufs - 1; i++, rb_inc_page(&head_page)) {
local_set(&head_page->entries, 0);
- rb_init_page(head_page->page);
+ rb_init_data_page(head_page->page);
}
}
@@ -2203,7 +2212,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages, int sc
*/
for (i = 0; i < meta->nr_subbufs; i++) {
meta->buffers[i] = i;
- rb_init_page(subbuf);
+ rb_init_data_page(subbuf);
subbuf += meta->subbuf_size;
}
}
@@ -2255,7 +2264,7 @@ static int rbm_show(struct seq_file *m, void *v)
val -= 2;
dpage = rb_range_buffer(cpu_buffer, val);
seq_printf(m, "buffer[%ld]: %d (commit: %ld)\n",
- val, meta->buffers[val], dpage ? local_read(&dpage->commit) : -1);
+ val, meta->buffers[val], dpage ? rb_data_page_commit(dpage) : -1);
return 0;
}
@@ -2646,7 +2655,7 @@ static void rb_test_inject_invalid_pages(struct trace_buffer *buffer)
dpage = (void *)(ptr + idx * subbuf_size);
/* Skip unused pages */
- if (!local_read(&dpage->commit))
+ if (!rb_data_page_commit(dpage))
continue;
/*
@@ -2658,7 +2667,7 @@ static void rb_test_inject_invalid_pages(struct trace_buffer *buffer)
invalid++;
} else {
/* Count total commit bytes. */
- entry_bytes += local_read(&dpage->commit) & ~RB_MISSED_MASK;
+ entry_bytes += rb_data_page_size(dpage);
}
}
@@ -4187,8 +4196,7 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->commit_page->page->commit,
rb_page_write(cpu_buffer->commit_page));
RB_WARN_ON(cpu_buffer,
- local_read(&cpu_buffer->commit_page->page->commit) &
- ~RB_WRITE_MASK);
+ rb_page_commit(cpu_buffer->commit_page) & ~RB_WRITE_MASK);
barrier();
}
@@ -4560,7 +4568,7 @@ static const char *show_interrupt_level(void)
return show_irq_str(level);
}
-static void dump_buffer_page(struct buffer_data_page *bpage,
+static void dump_buffer_page(struct buffer_data_page *dpage,
struct rb_event_info *info,
unsigned long tail)
{
@@ -4568,12 +4576,12 @@ static void dump_buffer_page(struct buffer_data_page *bpage,
u64 ts, delta;
int e;
- ts = bpage->time_stamp;
+ ts = dpage->time_stamp;
pr_warn(" [%lld] PAGE TIME STAMP\n", ts);
for (e = 0; e < tail; e += rb_event_length(event)) {
- event = (struct ring_buffer_event *)(bpage->data + e);
+ event = (struct ring_buffer_event *)(dpage->data + e);
switch (event->type_len) {
@@ -4623,7 +4631,7 @@ static atomic_t ts_dump;
} \
atomic_inc(&cpu_buffer->record_disabled); \
pr_warn(fmt, ##__VA_ARGS__); \
- dump_buffer_page(bpage, info, tail); \
+ dump_buffer_page(dpage, info, tail); \
atomic_dec(&ts_dump); \
/* There's some cases in boot up that this can happen */ \
if (WARN_ON_ONCE(system_state != SYSTEM_BOOTING)) \
@@ -4639,16 +4647,16 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
struct rb_event_info *info,
unsigned long tail)
{
- struct buffer_data_page *bpage;
+ struct buffer_data_page *dpage;
u64 ts, delta;
bool full = false;
int ret;
- bpage = info->tail_page->page;
+ dpage = info->tail_page->page;
if (tail == CHECK_FULL_PAGE) {
full = true;
- tail = local_read(&bpage->commit);
+ tail = rb_data_page_commit(dpage);
} else if (info->add_timestamp &
(RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE)) {
/* Ignore events with absolute time stamps */
@@ -4659,7 +4667,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
* Do not check the first event (skip possible extends too).
* Also do not check if previous events have not been committed.
*/
- if (tail <= 8 || tail > local_read(&bpage->commit))
+ if (tail <= 8 || tail > rb_data_page_commit(dpage))
return;
/*
@@ -4668,7 +4676,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
goto out;
- ret = rb_read_data_buffer(bpage, tail, cpu_buffer->cpu, &ts, &delta);
+ ret = rb_read_data_buffer(dpage, tail, cpu_buffer->cpu, &ts, &delta);
if (ret < 0) {
if (delta < ts) {
buffer_warn_return("[CPU: %d]ABSOLUTE TIME WENT BACKWARDS: last ts: %lld absolute ts: %lld clock:%pS\n",
@@ -6456,7 +6464,7 @@ static void rb_clear_buffer_page(struct buffer_page *page)
{
local_set(&page->write, 0);
local_set(&page->entries, 0);
- rb_init_page(page->page);
+ rb_init_data_page(page->page);
page->read = 0;
}
@@ -6941,7 +6949,7 @@ ring_buffer_alloc_read_page(struct trace_buffer *buffer, int cpu)
local_irq_restore(flags);
if (bpage->data) {
- rb_init_page(bpage->data);
+ rb_init_data_page(bpage->data);
} else {
bpage->data = alloc_cpu_data(cpu, cpu_buffer->buffer->subbuf_order);
if (!bpage->data) {
@@ -6966,8 +6974,8 @@ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu,
struct buffer_data_read_page *data_page)
{
struct ring_buffer_per_cpu *cpu_buffer;
- struct buffer_data_page *bpage = data_page->data;
- struct page *page = virt_to_page(bpage);
+ struct buffer_data_page *dpage = data_page->data;
+ struct page *page = virt_to_page(dpage);
unsigned long flags;
if (!buffer || !buffer->buffers || !buffer->buffers[cpu])
@@ -6987,15 +6995,15 @@ void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu,
arch_spin_lock(&cpu_buffer->lock);
if (!cpu_buffer->free_page) {
- cpu_buffer->free_page = bpage;
- bpage = NULL;
+ cpu_buffer->free_page = dpage;
+ dpage = NULL;
}
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
out:
- free_pages((unsigned long)bpage, data_page->order);
+ free_pages((unsigned long)dpage, data_page->order);
kfree(data_page);
}
EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
@@ -7040,7 +7048,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
{
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
struct ring_buffer_event *event;
- struct buffer_data_page *bpage;
+ struct buffer_data_page *dpage;
struct buffer_page *reader;
unsigned long missed_events;
unsigned int commit;
@@ -7066,8 +7074,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
if (data_page->order != buffer->subbuf_order)
return -1;
- bpage = data_page->data;
- if (!bpage)
+ dpage = data_page->data;
+ if (!dpage)
return -1;
guard(raw_spinlock_irqsave)(&cpu_buffer->reader_lock);
@@ -7133,7 +7141,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* We have already ensured there's enough space if this
* is a time extend. */
size = rb_event_length(event);
- memcpy(bpage->data + pos, rpage->data + rpos, size);
+ memcpy(dpage->data + pos, rpage->data + rpos, size);
len -= size;
@@ -7149,9 +7157,9 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
size = rb_event_ts_length(event);
} while (len >= size);
- /* update bpage */
- local_set(&bpage->commit, pos);
- bpage->time_stamp = save_timestamp;
+ /* update dpage */
+ local_set(&dpage->commit, pos);
+ dpage->time_stamp = save_timestamp;
/* we copied everything to the beginning */
read = 0;
@@ -7161,13 +7169,13 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
cpu_buffer->read_bytes += rb_page_size(reader);
/* swap the pages */
- rb_init_page(bpage);
- bpage = reader->page;
+ rb_init_data_page(dpage);
+ dpage = reader->page;
reader->page = data_page->data;
local_set(&reader->write, 0);
local_set(&reader->entries, 0);
reader->read = 0;
- data_page->data = bpage;
+ data_page->data = dpage;
/*
* Use the real_end for the data size,
@@ -7175,12 +7183,12 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* on the page.
*/
if (reader->real_end)
- local_set(&bpage->commit, reader->real_end);
+ local_set(&dpage->commit, reader->real_end);
}
cpu_buffer->lost_events = 0;
- commit = local_read(&bpage->commit);
+ commit = rb_data_page_commit(dpage);
/*
* Set a flag in the commit field if we lost events
*/
@@ -7189,19 +7197,19 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* missed events, then record it there.
*/
if (buffer->subbuf_size - commit >= sizeof(missed_events)) {
- memcpy(&bpage->data[commit], &missed_events,
+ memcpy(&dpage->data[commit], &missed_events,
sizeof(missed_events));
- local_add(RB_MISSED_STORED, &bpage->commit);
+ local_add(RB_MISSED_STORED, &dpage->commit);
commit += sizeof(missed_events);
}
- local_add(RB_MISSED_EVENTS, &bpage->commit);
+ local_add(RB_MISSED_EVENTS, &dpage->commit);
}
/*
* This page may be off to user land. Zero it out here.
*/
if (commit < buffer->subbuf_size)
- memset(&bpage->data[commit], 0, buffer->subbuf_size - commit);
+ memset(&dpage->data[commit], 0, buffer->subbuf_size - commit);
return read;
}
@@ -7832,7 +7840,7 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu)
if (missed_events) {
if (cpu_buffer->reader_page != cpu_buffer->commit_page) {
- struct buffer_data_page *bpage = reader->page;
+ struct buffer_data_page *dpage = reader->page;
unsigned int commit;
/*
* Use the real_end for the data size,
@@ -7840,18 +7848,18 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu)
* on the page.
*/
if (reader->real_end)
- local_set(&bpage->commit, reader->real_end);
+ local_set(&dpage->commit, reader->real_end);
/*
* If there is room at the end of the page to save the
* missed events, then record it there.
*/
commit = rb_page_size(reader);
if (buffer->subbuf_size - commit >= sizeof(missed_events)) {
- memcpy(&bpage->data[commit], &missed_events,
+ memcpy(&dpage->data[commit], &missed_events,
sizeof(missed_events));
- local_add(RB_MISSED_STORED, &bpage->commit);
+ local_add(RB_MISSED_STORED, &dpage->commit);
}
- local_add(RB_MISSED_EVENTS, &bpage->commit);
+ local_add(RB_MISSED_EVENTS, &dpage->commit);
} else if (!WARN_ONCE(cpu_buffer->reader_page == cpu_buffer->tail_page,
"Reader on commit with %ld missed events",
missed_events)) {
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (5 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 06/10] ring-buffer: Cleanup buffer_data_page related code Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-21 2:51 ` Masami Hiramatsu
2026-05-20 18:49 ` [PATCH v20 08/10] ring-buffer: Have dropped subbuffers be persistent across reboots Steven Rostedt
` (3 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: Steven Rostedt <rostedt@goodmis.org>
On bootup if the persistent ring buffer finds an invalid sub-buffer, it
only invalidates the invalid sub-buffer and continues. Several sub-buffers
may be invalid and this can cause the iterator to loop more than 3 times
looking for a new event. If that happens, then it returns NULL. Having a
NULL return early can confuse the iterator looking for the next event, and
may show events out of order.
Have the same logic for the consuming read for the iterator that will
allow the loop to find the next event to happen the number of sub-buffers
and not just 3.
Fixes: **TBD** ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c6c2f92bfc24..bda53a2d2159 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -6103,12 +6103,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int nr_loops = 0;
+ int max_loops;
if (ts)
*ts = 0;
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
+ max_loops = cpu_buffer->ring_meta ? cpu_buffer->nr_pages : 3;
/*
* Check if someone performed a consuming read to the buffer
@@ -6131,7 +6133,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
* the ring buffer with an active write as the consumer is.
* Do not warn if the three failures is reached.
*/
- if (++nr_loops > 3)
+ if (++nr_loops > max_loops)
return NULL;
if (rb_per_cpu_empty(cpu_buffer))
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator
2026-05-20 18:49 ` [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator Steven Rostedt
@ 2026-05-21 2:51 ` Masami Hiramatsu
2026-05-21 2:58 ` Steven Rostedt
0 siblings, 1 reply; 22+ messages in thread
From: Masami Hiramatsu @ 2026-05-21 2:51 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu,
Mathieu Desnoyers, Catalin Marinas, Will Deacon, Ian Rogers
On Wed, 20 May 2026 14:49:45 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> On bootup if the persistent ring buffer finds an invalid sub-buffer, it
> only invalidates the invalid sub-buffer and continues. Several sub-buffers
> may be invalid and this can cause the iterator to loop more than 3 times
> looking for a new event. If that happens, then it returns NULL. Having a
> NULL return early can confuse the iterator looking for the next event, and
> may show events out of order.
>
> Have the same logic for the consuming read for the iterator that will
> allow the loop to find the next event to happen the number of sub-buffers
> and not just 3.
>
> Fixes: **TBD** ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Should we merge this into the original one?
Anyway, this looks good to me.
Thanks,
> ---
> kernel/trace/ring_buffer.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index c6c2f92bfc24..bda53a2d2159 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -6103,12 +6103,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
> struct ring_buffer_per_cpu *cpu_buffer;
> struct ring_buffer_event *event;
> int nr_loops = 0;
> + int max_loops;
>
> if (ts)
> *ts = 0;
>
> cpu_buffer = iter->cpu_buffer;
> buffer = cpu_buffer->buffer;
> + max_loops = cpu_buffer->ring_meta ? cpu_buffer->nr_pages : 3;
>
> /*
> * Check if someone performed a consuming read to the buffer
> @@ -6131,7 +6133,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
> * the ring buffer with an active write as the consumer is.
> * Do not warn if the three failures is reached.
> */
> - if (++nr_loops > 3)
> + if (++nr_loops > max_loops)
> return NULL;
>
> if (rb_per_cpu_empty(cpu_buffer))
> --
> 2.53.0
>
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator
2026-05-21 2:51 ` Masami Hiramatsu
@ 2026-05-21 2:58 ` Steven Rostedt
0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 2:58 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: linux-kernel, linux-trace-kernel, Mathieu Desnoyers,
Catalin Marinas, Will Deacon, Ian Rogers
On Thu, 21 May 2026 11:51:25 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> > Fixes: **TBD** ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
> > Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
>
> Should we merge this into the original one?
>
> Anyway, this looks good to me.
I could. I never know how to do this really. That is, I always feel
uncomfortable modifying someone else's patch. But I could add:
[ SDR: Added skip of invalid sub-buffer for iterator case ]
Above my SOB.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v20 08/10] ring-buffer: Have dropped subbuffers be persistent across reboots
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (6 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 07/10] ring-buffer: Skip invalid sub-buffers for iterator Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-21 6:29 ` Masami Hiramatsu
2026-05-20 18:49 ` [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file Steven Rostedt
` (2 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: Steven Rostedt <rostedt@goodmis.org>
When the persistent ring buffer detects a corrupted subbuffer, it will
zero its size and report dropped pages in the dmesg, then it continues
normally.
But if a reboot happens without clearing or restarting tracing on the
persistent ring buffer, the next boot will show no pages are dropped.
If the persistent ring buffer is still the same, then it should still
report dropped pages so the user knows that the buffer has missing events.
Add the RB_MISSED_EVENTS flag to the commit value of the subbuffer so that
the next boot will still show that pages were dropped.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bda53a2d2159..ae5c645b59c9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1915,7 +1915,7 @@ static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
* Even after clearing these bits, a commit value greater than the
* subbuf_size is considered invalid.
*/
- tail = rb_data_page_size(dpage);
+ tail = rb_data_page_commit(dpage);
if (tail <= meta->subbuf_size - BUF_PAGE_HDR_SIZE)
ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
else
@@ -1929,7 +1929,7 @@ static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
*/
if (ret < 0 || (prev_ts && prev_ts > ts) || (next_ts && ts > next_ts)) {
local_set(&bpage->entries, 0);
- local_set(&dpage->commit, 0);
+ local_set(&dpage->commit, RB_MISSED_EVENTS);
dpage->time_stamp = prev_ts ? prev_ts : next_ts;
ret = -1;
} else {
@@ -3444,7 +3444,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
* is a mb(), which will synchronize with the rmb here.
* (see rb_tail_page_update() and __rb_reserve_next())
*/
- commit = rb_page_commit(iter_head_page);
+ commit = rb_page_size(iter_head_page);
smp_rmb();
/* An event needs to be at least 8 bytes in size */
@@ -3473,7 +3473,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
/* Make sure the page didn't change since we read this */
if (iter->page_stamp != iter_head_page->page->time_stamp ||
- commit > rb_page_commit(iter_head_page))
+ commit > rb_page_size(iter_head_page))
goto reset;
iter->next_event = iter->head + length;
@@ -5643,7 +5643,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
* (see rb_tail_page_update())
*/
smp_rmb();
- commit = rb_page_commit(commit_page);
+ commit = rb_page_size(commit_page);
/* We want to make sure that the commit page doesn't change */
smp_rmb();
@@ -5836,6 +5836,7 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
*/
local_set(&cpu_buffer->reader_page->write, 0);
local_set(&cpu_buffer->reader_page->entries, 0);
+ rb_init_data_page(cpu_buffer->reader_page->page);
cpu_buffer->reader_page->real_end = 0;
spin:
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v20 08/10] ring-buffer: Have dropped subbuffers be persistent across reboots
2026-05-20 18:49 ` [PATCH v20 08/10] ring-buffer: Have dropped subbuffers be persistent across reboots Steven Rostedt
@ 2026-05-21 6:29 ` Masami Hiramatsu
0 siblings, 0 replies; 22+ messages in thread
From: Masami Hiramatsu @ 2026-05-21 6:29 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu,
Mathieu Desnoyers, Catalin Marinas, Will Deacon, Ian Rogers
On Wed, 20 May 2026 14:49:46 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> When the persistent ring buffer detects a corrupted subbuffer, it will
> zero its size and report dropped pages in the dmesg, then it continues
> normally.
>
> But if a reboot happens without clearing or restarting tracing on the
> persistent ring buffer, the next boot will show no pages are dropped.
>
> If the persistent ring buffer is still the same, then it should still
> report dropped pages so the user knows that the buffer has missing events.
>
> Add the RB_MISSED_EVENTS flag to the commit value of the subbuffer so that
> the next boot will still show that pages were dropped.
>
Looks good to me.
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Thanks!
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
> kernel/trace/ring_buffer.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index bda53a2d2159..ae5c645b59c9 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -1915,7 +1915,7 @@ static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
> * Even after clearing these bits, a commit value greater than the
> * subbuf_size is considered invalid.
> */
> - tail = rb_data_page_size(dpage);
> + tail = rb_data_page_commit(dpage);
> if (tail <= meta->subbuf_size - BUF_PAGE_HDR_SIZE)
> ret = rb_read_data_buffer(dpage, tail, cpu, &ts, &delta);
> else
> @@ -1929,7 +1929,7 @@ static int __rb_validate_buffer(struct buffer_page *bpage, int cpu,
> */
> if (ret < 0 || (prev_ts && prev_ts > ts) || (next_ts && ts > next_ts)) {
> local_set(&bpage->entries, 0);
> - local_set(&dpage->commit, 0);
> + local_set(&dpage->commit, RB_MISSED_EVENTS);
> dpage->time_stamp = prev_ts ? prev_ts : next_ts;
> ret = -1;
> } else {
> @@ -3444,7 +3444,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
> * is a mb(), which will synchronize with the rmb here.
> * (see rb_tail_page_update() and __rb_reserve_next())
> */
> - commit = rb_page_commit(iter_head_page);
> + commit = rb_page_size(iter_head_page);
> smp_rmb();
>
> /* An event needs to be at least 8 bytes in size */
> @@ -3473,7 +3473,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
>
> /* Make sure the page didn't change since we read this */
> if (iter->page_stamp != iter_head_page->page->time_stamp ||
> - commit > rb_page_commit(iter_head_page))
> + commit > rb_page_size(iter_head_page))
> goto reset;
>
> iter->next_event = iter->head + length;
> @@ -5643,7 +5643,7 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
> * (see rb_tail_page_update())
> */
> smp_rmb();
> - commit = rb_page_commit(commit_page);
> + commit = rb_page_size(commit_page);
> /* We want to make sure that the commit page doesn't change */
> smp_rmb();
>
> @@ -5836,6 +5836,7 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
> */
> local_set(&cpu_buffer->reader_page->write, 0);
> local_set(&cpu_buffer->reader_page->entries, 0);
> + rb_init_data_page(cpu_buffer->reader_page->page);
> cpu_buffer->reader_page->real_end = 0;
>
> spin:
> --
> 2.53.0
>
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (7 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 08/10] ring-buffer: Have dropped subbuffers be persistent across reboots Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-21 6:34 ` Masami Hiramatsu
2026-05-20 18:49 ` [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file Steven Rostedt
2026-05-21 8:13 ` [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Masami Hiramatsu
10 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: Steven Rostedt <rostedt@goodmis.org>
When the persistent ring buffer is validated on boot up, if a subbuffer is
deemed invalid, it resets the buffer and continues. Currently, these lost
events are not shown in the trace file output.
Have the trace iterator look for subbuffers that have the RB_MISSED_EVENTS
set and set the iter->missed_events flag when it is detected. This will
then have the trace file shows "LOST EVENTS" when it reads across a
subbuffer that was corrupted and invalidated.
For example:
<...>-1016 [005] ...1. 6230.660403: preempt_disable: caller=__mod_memcg_state+0x1c8/0x200 parent=__mod_memcg_state+0x1c8/0x200
CPU:5 [LOST EVENTS]
<...>-1016 [005] ..... 6230.660673: kmem_cache_alloc: call_site=__anon_vma_prepare+0x1ad/0x1e0 ptr=000000006e40294c name=anon_vma bytes_req=200 bytes_alloc=208 gfp_flags=GFP_KERNEL node=-1 accounted=true
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ae5c645b59c9..9cdbee171cdc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3518,6 +3518,9 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
else
rb_inc_page(&iter->head_page);
+ if (rb_page_commit(iter->head_page) & RB_MISSED_EVENTS)
+ iter->missed_events = -1;
+
iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
iter->head = 0;
iter->next_event = 0;
@@ -5579,6 +5582,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->head_page = cpu_buffer->reader_page;
iter->head = cpu_buffer->reader_page->read;
iter->next_event = iter->head;
+ iter->missed_events = 0;
iter->cache_reader_page = iter->head_page;
iter->cache_read = cpu_buffer->read;
@@ -7053,7 +7057,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
struct ring_buffer_event *event;
struct buffer_data_page *dpage;
struct buffer_page *reader;
- unsigned long missed_events;
+ long missed_events;
unsigned int commit;
unsigned int read;
u64 save_timestamp;
@@ -7179,6 +7183,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
local_set(&reader->entries, 0);
reader->read = 0;
data_page->data = dpage;
+ if (!missed_events && rb_data_page_commit(dpage) & RB_MISSED_EVENTS)
+ missed_events = -1;
/*
* Use the real_end for the data size,
@@ -7196,10 +7202,12 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* Set a flag in the commit field if we lost events
*/
if (missed_events) {
- /* If there is room at the end of the page to save the
+ /*
+ * If there is room at the end of the page to save the
* missed events, then record it there.
*/
- if (buffer->subbuf_size - commit >= sizeof(missed_events)) {
+ if (missed_events > 0 &&
+ buffer->subbuf_size - commit >= sizeof(missed_events)) {
memcpy(&dpage->data[commit], &missed_events,
sizeof(missed_events));
local_add(RB_MISSED_STORED, &dpage->commit);
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file
2026-05-20 18:49 ` [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file Steven Rostedt
@ 2026-05-21 6:34 ` Masami Hiramatsu
2026-05-21 12:14 ` Steven Rostedt
0 siblings, 1 reply; 22+ messages in thread
From: Masami Hiramatsu @ 2026-05-21 6:34 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu,
Mathieu Desnoyers, Catalin Marinas, Will Deacon, Ian Rogers
On Wed, 20 May 2026 14:49:47 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> When the persistent ring buffer is validated on boot up, if a subbuffer is
> deemed invalid, it resets the buffer and continues. Currently, these lost
> events are not shown in the trace file output.
>
> Have the trace iterator look for subbuffers that have the RB_MISSED_EVENTS
> set and set the iter->missed_events flag when it is detected. This will
> then have the trace file shows "LOST EVENTS" when it reads across a
> subbuffer that was corrupted and invalidated.
I think it is good to show the dropped events. BTW, is it better to
comment out the line, just for parser?
For example, add a '#' at the like.
# CPU:5 [LOST EVENTS]
Ah, but it is already done...
Thanks,
>
> For example:
>
> <...>-1016 [005] ...1. 6230.660403: preempt_disable: caller=__mod_memcg_state+0x1c8/0x200 parent=__mod_memcg_state+0x1c8/0x200
> CPU:5 [LOST EVENTS]
> <...>-1016 [005] ..... 6230.660673: kmem_cache_alloc: call_site=__anon_vma_prepare+0x1ad/0x1e0 ptr=000000006e40294c name=anon_vma bytes_req=200 bytes_alloc=208 gfp_flags=GFP_KERNEL node=-1 accounted=true
>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---
> kernel/trace/ring_buffer.c | 14 +++++++++++---
> 1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index ae5c645b59c9..9cdbee171cdc 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -3518,6 +3518,9 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
> else
> rb_inc_page(&iter->head_page);
>
> + if (rb_page_commit(iter->head_page) & RB_MISSED_EVENTS)
> + iter->missed_events = -1;
> +
> iter->page_stamp = iter->read_stamp = iter->head_page->page->time_stamp;
> iter->head = 0;
> iter->next_event = 0;
> @@ -5579,6 +5582,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
> iter->head_page = cpu_buffer->reader_page;
> iter->head = cpu_buffer->reader_page->read;
> iter->next_event = iter->head;
> + iter->missed_events = 0;
>
> iter->cache_reader_page = iter->head_page;
> iter->cache_read = cpu_buffer->read;
> @@ -7053,7 +7057,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
> struct ring_buffer_event *event;
> struct buffer_data_page *dpage;
> struct buffer_page *reader;
> - unsigned long missed_events;
> + long missed_events;
> unsigned int commit;
> unsigned int read;
> u64 save_timestamp;
> @@ -7179,6 +7183,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
> local_set(&reader->entries, 0);
> reader->read = 0;
> data_page->data = dpage;
> + if (!missed_events && rb_data_page_commit(dpage) & RB_MISSED_EVENTS)
> + missed_events = -1;
>
> /*
> * Use the real_end for the data size,
> @@ -7196,10 +7202,12 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
> * Set a flag in the commit field if we lost events
> */
> if (missed_events) {
> - /* If there is room at the end of the page to save the
> + /*
> + * If there is room at the end of the page to save the
> * missed events, then record it there.
> */
> - if (buffer->subbuf_size - commit >= sizeof(missed_events)) {
> + if (missed_events > 0 &&
> + buffer->subbuf_size - commit >= sizeof(missed_events)) {
> memcpy(&dpage->data[commit], &missed_events,
> sizeof(missed_events));
> local_add(RB_MISSED_STORED, &dpage->commit);
> --
> 2.53.0
>
>
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file
2026-05-21 6:34 ` Masami Hiramatsu
@ 2026-05-21 12:14 ` Steven Rostedt
0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 12:14 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: linux-kernel, linux-trace-kernel, Mathieu Desnoyers,
Catalin Marinas, Will Deacon, Ian Rogers
On Thu, 21 May 2026 15:34:16 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> I think it is good to show the dropped events. BTW, is it better to
> comment out the line, just for parser?
> For example, add a '#' at the like.
>
> # CPU:5 [LOST EVENTS]
>
> Ah, but it is already done...
Yeah, can't modify that. The "[LOST EVENTS]" output has been around since
2020 and changing that now will likely break user API.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (8 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 09/10] ring-buffer: Show persistent buffer dropped events in trace file Steven Rostedt
@ 2026-05-20 18:49 ` Steven Rostedt
2026-05-21 8:18 ` Masami Hiramatsu
2026-05-21 8:13 ` [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Masami Hiramatsu
10 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-20 18:49 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel
Cc: Masami Hiramatsu, Mathieu Desnoyers, Catalin Marinas, Will Deacon,
Ian Rogers
From: Steven Rostedt <rostedt@goodmis.org>
When the persistent ring buffer is validated on boot up, if a subbuffer is
deemed invalid, it resets the buffer and continues. Have the code preserve
the RB_MISSED_EVENTS flag in the commit portion of the subbuffer header
and pass that back so that the trace_pipe file can show the missed events
like the trace file does.
For example:
<...>-1242 [005] d.... 4429.120116: page_fault_user: address=0x7ffaebb6e728 ip=0x7ffaeb9d4960 error_code=0x7
<...>-1242 [005] ..... 4429.120124: mm_page_alloc: page=00000000055254f3 pfn=0x1373bd order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP
<...>-1242 [005] d..2. 4429.120132: tlb_flush: pages:1 reason:local MM shootdown (3)
CPU:5 [LOST EVENTS]
<...>-1242 [005] d.... 4429.120661: page_fault_user: address=0x55ba7c2d0944 ip=0x55ba7c20cd02 error_code=0x7
<...>-1242 [005] ..... 4429.120669: mm_page_alloc: page=0000000005a02500 pfn=0x12b6e4 order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP
<...>-1242 [005] d..2. 4429.120680: tlb_flush: pages:1 reason:local MM shootdown (3)
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/ring_buffer.c | 57 +++++++++++++++++++++++---------------
1 file changed, 35 insertions(+), 22 deletions(-)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9cdbee171cdc..f42d2176b92c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5794,6 +5794,7 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
struct buffer_page *reader = NULL;
unsigned long overwrite;
unsigned long flags;
+ int missed_events = 0;
int nr_loops = 0;
bool ret;
@@ -5894,6 +5895,9 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
if (!ret)
goto spin;
+ if (rb_page_commit(reader) & RB_MISSED_EVENTS)
+ missed_events = -1;
+
if (cpu_buffer->ring_meta)
rb_update_meta_reader(cpu_buffer, reader);
@@ -5958,6 +5962,8 @@ __rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
*/
smp_rmb();
+ if (!cpu_buffer->lost_events)
+ cpu_buffer->lost_events = missed_events;
return reader;
}
@@ -7059,6 +7065,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
struct buffer_page *reader;
long missed_events;
unsigned int commit;
+ unsigned int size;
unsigned int read;
u64 save_timestamp;
bool force_memcpy;
@@ -7094,7 +7101,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
event = rb_reader_event(cpu_buffer);
read = reader->read;
- commit = rb_page_size(reader);
+ commit = rb_page_commit(reader);
+ size = rb_page_size(reader);
/* Check if any events were dropped */
missed_events = cpu_buffer->lost_events;
@@ -7108,13 +7116,14 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* we must copy the data from the page to the buffer.
* Otherwise, we can simply swap the page with the one passed in.
*/
- if (read || (len < (commit - read)) ||
+ if (read || (len < (size - read)) ||
cpu_buffer->reader_page == cpu_buffer->commit_page ||
force_memcpy) {
struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
unsigned int rpos = read;
unsigned int pos = 0;
- unsigned int size;
+ unsigned int event_size;
+ unsigned int flags = 0;
/*
* If a full page is expected, this can still be returned
@@ -7123,19 +7132,23 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* the reader page.
*/
if (full &&
- (!read || (len < (commit - read)) ||
+ (!read || (len < (size - read)) ||
cpu_buffer->reader_page == cpu_buffer->commit_page))
return -1;
- if (len > (commit - read))
- len = (commit - read);
+ if (len > (size - read))
+ len = (size - read);
/* Always keep the time extend and data together */
- size = rb_event_ts_length(event);
+ event_size = rb_event_ts_length(event);
- if (len < size)
+ if (len < event_size)
return -1;
+ if (commit & RB_MISSED_EVENTS) {
+ printk("MISSED\n");
+ flags = RB_MISSED_EVENTS; }
+
/* save the current timestamp, since the user will need it */
save_timestamp = cpu_buffer->read_stamp;
@@ -7147,25 +7160,25 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* one or two events.
* We have already ensured there's enough space if this
* is a time extend. */
- size = rb_event_length(event);
- memcpy(dpage->data + pos, rpage->data + rpos, size);
+ event_size = rb_event_length(event);
+ memcpy(dpage->data + pos, rpage->data + rpos, event_size);
- len -= size;
+ len -= event_size;
rb_advance_reader(cpu_buffer);
rpos = reader->read;
- pos += size;
+ pos += event_size;
- if (rpos >= commit)
+ if (rpos >= event_size)
break;
event = rb_reader_event(cpu_buffer);
/* Always keep the time extend and data together */
- size = rb_event_ts_length(event);
- } while (len >= size);
+ event_size = rb_event_ts_length(event);
+ } while (len >= event_size);
/* update dpage */
- local_set(&dpage->commit, pos);
+ local_set(&dpage->commit, pos | flags);
dpage->time_stamp = save_timestamp;
/* we copied everything to the beginning */
@@ -7197,7 +7210,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
cpu_buffer->lost_events = 0;
- commit = rb_data_page_commit(dpage);
+ size = rb_data_page_size(dpage);
/*
* Set a flag in the commit field if we lost events
*/
@@ -7207,11 +7220,11 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
* missed events, then record it there.
*/
if (missed_events > 0 &&
- buffer->subbuf_size - commit >= sizeof(missed_events)) {
- memcpy(&dpage->data[commit], &missed_events,
+ buffer->subbuf_size - size >= sizeof(missed_events)) {
+ memcpy(&dpage->data[size], &missed_events,
sizeof(missed_events));
local_add(RB_MISSED_STORED, &dpage->commit);
- commit += sizeof(missed_events);
+ size += sizeof(missed_events);
}
local_add(RB_MISSED_EVENTS, &dpage->commit);
}
@@ -7219,8 +7232,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
/*
* This page may be off to user land. Zero it out here.
*/
- if (commit < buffer->subbuf_size)
- memset(&dpage->data[commit], 0, buffer->subbuf_size - commit);
+ if (size < buffer->subbuf_size)
+ memset(&dpage->data[size], 0, buffer->subbuf_size - size);
return read;
}
--
2.53.0
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file
2026-05-20 18:49 ` [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file Steven Rostedt
@ 2026-05-21 8:18 ` Masami Hiramatsu
2026-05-21 12:17 ` Steven Rostedt
0 siblings, 1 reply; 22+ messages in thread
From: Masami Hiramatsu @ 2026-05-21 8:18 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu,
Mathieu Desnoyers, Catalin Marinas, Will Deacon, Ian Rogers
On Wed, 20 May 2026 14:49:48 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
>
> When the persistent ring buffer is validated on boot up, if a subbuffer is
> deemed invalid, it resets the buffer and continues. Have the code preserve
> the RB_MISSED_EVENTS flag in the commit portion of the subbuffer header
> and pass that back so that the trace_pipe file can show the missed events
> like the trace file does.
>
> For example:
>
> <...>-1242 [005] d.... 4429.120116: page_fault_user: address=0x7ffaebb6e728 ip=0x7ffaeb9d4960 error_code=0x7
> <...>-1242 [005] ..... 4429.120124: mm_page_alloc: page=00000000055254f3 pfn=0x1373bd order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP
> <...>-1242 [005] d..2. 4429.120132: tlb_flush: pages:1 reason:local MM shootdown (3)
> CPU:5 [LOST EVENTS]
> <...>-1242 [005] d.... 4429.120661: page_fault_user: address=0x55ba7c2d0944 ip=0x55ba7c20cd02 error_code=0x7
> <...>-1242 [005] ..... 4429.120669: mm_page_alloc: page=0000000005a02500 pfn=0x12b6e4 order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_COMP
> <...>-1242 [005] d..2. 4429.120680: tlb_flush: pages:1 reason:local MM shootdown (3)
OK, this looks good, but I have a comment below.
[...]
> @@ -7123,19 +7132,23 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
> * the reader page.
> */
> if (full &&
> - (!read || (len < (commit - read)) ||
> + (!read || (len < (size - read)) ||
> cpu_buffer->reader_page == cpu_buffer->commit_page))
> return -1;
>
> - if (len > (commit - read))
> - len = (commit - read);
> + if (len > (size - read))
> + len = (size - read);
>
> /* Always keep the time extend and data together */
> - size = rb_event_ts_length(event);
> + event_size = rb_event_ts_length(event);
>
> - if (len < size)
> + if (len < event_size)
> return -1;
>
> + if (commit & RB_MISSED_EVENTS) {
> + printk("MISSED\n");
Is it for debug?
> + flags = RB_MISSED_EVENTS; }
nit: block closing brace is in the previous line. Maybe typo?
Thanks,
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file
2026-05-21 8:18 ` Masami Hiramatsu
@ 2026-05-21 12:17 ` Steven Rostedt
0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 12:17 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: linux-kernel, linux-trace-kernel, Mathieu Desnoyers,
Catalin Marinas, Will Deacon, Ian Rogers
On Thu, 21 May 2026 17:18:59 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> >
> > + if (commit & RB_MISSED_EVENTS) {
> > + printk("MISSED\n");
>
> Is it for debug?
Yes, that was left over. I thought I got rid of all of them. :-p
>
> > + flags = RB_MISSED_EVENTS; }
>
> nit: block closing brace is in the previous line. Maybe typo?
When I do debug statements, I end block statements like this to let me know
that the if statement is supposed to be one line when I remove the print.
But since I missed removing this one, I kept the above formatting.
I'll remove this in v21.
Thanks,
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust
2026-05-20 18:49 [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Steven Rostedt
` (9 preceding siblings ...)
2026-05-20 18:49 ` [PATCH v20 10/10] ring-buffer: Show persistent buffer dropped events in trace_pipe file Steven Rostedt
@ 2026-05-21 8:13 ` Masami Hiramatsu
2026-05-21 12:17 ` Steven Rostedt
10 siblings, 1 reply; 22+ messages in thread
From: Masami Hiramatsu @ 2026-05-21 8:13 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu,
Mathieu Desnoyers, Catalin Marinas, Will Deacon, Ian Rogers
On Wed, 20 May 2026 14:49:38 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
>
> Here is the 20th version of improvement patches for making persistent
> ring buffers robust to failures.
> The previous version is here:
>
> https://lore.kernel.org/all/177751968499.2136606.17388366710182662849.stgit@mhiramat.tok.corp.google.com/
>
> None of the patches from the 19th version was changed. Only patches were
> added to it in this version. All of Masami's patches were in version 19,
> and all my patches are new to version 20. The reason I'm including
> Masami's patches with mine is so that Sashiko can handle all of them
> in one go.
Thanks for updating.
BTW, it seems Sashiko is stopping review this series.
https://sashiko.dev/#/patchset/20260520184938.749337513%40kernel.org
Not sure why.
>
> I moved patch 1 from v19 to my urgent branch as it was marked as
> fix for stable.
>
> The patches I added:
>
> - Fix an invalid sub-buffer in the iterator (added TBD fixes tag)
> I didn't want to fold the patch into the patch that was fixed
> as it was written by Masami.
>
> - Have the dropped buffers be persistent across boots. Masami's patches
> cleared the detection of lost subbuffers on boot up and the next
> boot would not show them. If the persistent ring buffer isn't cleared
> it should report the same info on subsequent boots.
>
> - Show [LOST EVENTS] in persistent trace file where a subbuffer was
> reset due to being invalid.
>
> - Show [LOST EVENTS] in the persistent trace_pipe file as well.
>
> Masami Hiramatsu (Google) (6):
> ring-buffer: Skip invalid sub-buffers when validating persistent ring buffer
> ring-buffer: Skip invalid sub-buffers when rewinding persistent ring buffer
> ring-buffer: Add persistent ring buffer invalid-page inject test
> ring-buffer: Show commit numbers in buffer_meta file
> ring-buffer: Cleanup persistent ring buffer validation
> ring-buffer: Cleanup buffer_data_page related code
>
> Steven Rostedt (4):
> ring-buffer: Skip invalid sub-buffers for iterator
> ring-buffer: Have dropped subbuffers be persistent across reboots
> ring-buffer: Show persistent buffer dropped events in trace file
> ring-buffer: Show persistent buffer dropped events in trace_pipe file
>
> ----
> include/linux/ring_buffer.h | 1 +
> kernel/trace/Kconfig | 34 +++
> kernel/trace/ring_buffer.c | 538 +++++++++++++++++++++++++++++---------------
> kernel/trace/trace.c | 4 +
> 4 files changed, 397 insertions(+), 180 deletions(-)
--
Masami Hiramatsu (Google) <mhiramat@kernel.org>
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust
2026-05-21 8:13 ` [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust Masami Hiramatsu
@ 2026-05-21 12:17 ` Steven Rostedt
2026-05-21 12:24 ` Steven Rostedt
0 siblings, 1 reply; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 12:17 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: linux-kernel, linux-trace-kernel, Mathieu Desnoyers,
Catalin Marinas, Will Deacon, Ian Rogers
On Thu, 21 May 2026 17:13:53 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> Thanks for updating.
>
> BTW, it seems Sashiko is stopping review this series.
> https://sashiko.dev/#/patchset/20260520184938.749337513%40kernel.org
>
> Not sure why.
Yeah, I noticed that too. And it never did your v19 either.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v20 00/10] ring-buffer: Making persistent ring buffers robust
2026-05-21 12:17 ` Steven Rostedt
@ 2026-05-21 12:24 ` Steven Rostedt
0 siblings, 0 replies; 22+ messages in thread
From: Steven Rostedt @ 2026-05-21 12:24 UTC (permalink / raw)
To: Masami Hiramatsu (Google)
Cc: linux-kernel, linux-trace-kernel, Mathieu Desnoyers,
Catalin Marinas, Will Deacon, Ian Rogers
On Thu, 21 May 2026 08:17:40 -0400
Steven Rostedt <rostedt@kernel.org> wrote:
> On Thu, 21 May 2026 17:13:53 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
>
> > Thanks for updating.
> >
> > BTW, it seems Sashiko is stopping review this series.
> > https://sashiko.dev/#/patchset/20260520184938.749337513%40kernel.org
> >
> > Not sure why.
>
> Yeah, I noticed that too. And it never did your v19 either.
Looking now, it did finish v19. It wasn't finished yesterday. Maybe it just
took a lot longer for this series.
-- Steve
^ permalink raw reply [flat|nested] 22+ messages in thread