public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ring-buffer: Preserve true payload lengths in long data events
@ 2026-04-07  9:15 Cao Ruichuang
  2026-04-07 15:48 ` Steven Rostedt
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Cao Ruichuang @ 2026-04-07  9:15 UTC (permalink / raw)
  To: rostedt, mhiramat; +Cc: mathieu.desnoyers, linux-kernel, linux-trace-kernel

Long ring buffer data records currently store the aligned in-buffer size in
their length field. That makes ring_buffer_event_length() report padded
sizes, and small TRACE_PRINT / TRACE_RAW_DATA records lose their true
payload length entirely when they use the short type_len encoding.

Teach long data events to keep the true payload size in array[0], and let
the ring buffer derive the aligned in-buffer size separately when it needs
to walk or discard records. Then add a long-reserve helper and use it for
TRACE_PRINT and TRACE_RAW_DATA so their zero-length-array tails always
preserve the real payload size.

The temporary filtered-event buffer keeps the same long-record payload
length semantics, and a QEMU runtime reproducer for trace_marker_raw now
reports the expected byte counts again.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=210173
Signed-off-by: Cao Ruichuang <create0818@163.com>
---
 include/linux/ring_buffer.h |  2 ++
 kernel/trace/ring_buffer.c  | 56 ++++++++++++++++++++++++++-----------
 kernel/trace/trace.c        |  8 +++---
 kernel/trace/trace.h        | 15 ++++++++++
 kernel/trace/trace_printk.c |  8 +++---
 5 files changed, 65 insertions(+), 24 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d862fa610..a4e46cb53 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -137,6 +137,8 @@ void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val);
 
 struct ring_buffer_event *ring_buffer_lock_reserve(struct trace_buffer *buffer,
 						   unsigned long length);
+struct ring_buffer_event *ring_buffer_lock_reserve_long(struct trace_buffer *buffer,
+							unsigned long length);
 int ring_buffer_unlock_commit(struct trace_buffer *buffer);
 int ring_buffer_write(struct trace_buffer *buffer,
 		      unsigned long length, void *data);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 170170bd8..c9ade62df 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -206,10 +206,14 @@ rb_event_data_length(struct ring_buffer_event *event)
 	unsigned length;
 
 	if (event->type_len)
-		length = event->type_len * RB_ALIGNMENT;
-	else
-		length = event->array[0];
-	return length + RB_EVNT_HDR_SIZE;
+		return event->type_len * RB_ALIGNMENT + RB_EVNT_HDR_SIZE;
+
+	/*
+	 * Long records store the true payload size in array[0], but still
+	 * consume an aligned amount of space in the buffer.
+	 */
+	length = event->array[0] + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
+	return ALIGN(length, RB_ARCH_ALIGNMENT);
 }
 
 /*
@@ -276,12 +280,13 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 	if (extended_time(event))
 		event = skip_time_extend(event);
 
+	if (!event->type_len)
+		return event->array[0];
+
 	length = rb_event_length(event);
 	if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 		return length;
 	length -= RB_EVNT_HDR_SIZE;
-	if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
-                length -= sizeof(event->array[0]);
 	return length;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_event_length);
@@ -463,9 +468,11 @@ struct rb_event_info {
 	u64			delta;
 	u64			before;
 	u64			after;
+	unsigned long		data_length;
 	unsigned long		length;
 	struct buffer_page	*tail_page;
 	int			add_timestamp;
+	bool			force_long;
 };
 
 /*
@@ -3796,14 +3803,15 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 
 	event->time_delta = delta;
 	length -= RB_EVNT_HDR_SIZE;
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT ||
+	    info->force_long) {
 		event->type_len = 0;
-		event->array[0] = length;
+		event->array[0] = info->data_length;
 	} else
 		event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
 }
 
-static unsigned rb_calculate_event_length(unsigned length)
+static unsigned int rb_calculate_event_length(unsigned int length, bool force_long)
 {
 	struct ring_buffer_event event; /* Used only for sizeof array */
 
@@ -3811,7 +3819,7 @@ static unsigned rb_calculate_event_length(unsigned length)
 	if (!length)
 		length++;
 
-	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+	if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT || force_long)
 		length += sizeof(event.array[0]);
 
 	length += RB_EVNT_HDR_SIZE;
@@ -4605,7 +4613,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 static __always_inline struct ring_buffer_event *
 rb_reserve_next_event(struct trace_buffer *buffer,
 		      struct ring_buffer_per_cpu *cpu_buffer,
-		      unsigned long length)
+		      unsigned long length, bool force_long)
 {
 	struct ring_buffer_event *event;
 	struct rb_event_info info;
@@ -4641,7 +4649,9 @@ rb_reserve_next_event(struct trace_buffer *buffer,
 	}
 #endif
 
-	info.length = rb_calculate_event_length(length);
+	info.length = rb_calculate_event_length(length, force_long);
+	info.data_length = length ? : 1;
+	info.force_long = force_long;
 
 	if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
 		add_ts_default = RB_ADD_STAMP_ABSOLUTE;
@@ -4698,8 +4708,9 @@ rb_reserve_next_event(struct trace_buffer *buffer,
  * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
  * If NULL is returned, then nothing has been allocated or locked.
  */
-struct ring_buffer_event *
-ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
+static struct ring_buffer_event *
+__ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length,
+			   bool force_long)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event;
@@ -4727,7 +4738,7 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
 	if (unlikely(trace_recursive_lock(cpu_buffer)))
 		goto out;
 
-	event = rb_reserve_next_event(buffer, cpu_buffer, length);
+	event = rb_reserve_next_event(buffer, cpu_buffer, length, force_long);
 	if (!event)
 		goto out_unlock;
 
@@ -4739,8 +4750,21 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
 	preempt_enable_notrace();
 	return NULL;
 }
+
+struct ring_buffer_event *
+ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
+{
+	return __ring_buffer_lock_reserve(buffer, length, false);
+}
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
+struct ring_buffer_event *
+ring_buffer_lock_reserve_long(struct trace_buffer *buffer, unsigned long length)
+{
+	return __ring_buffer_lock_reserve(buffer, length, true);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve_long);
+
 /*
  * Decrement the entries to the page that an event is on.
  * The event does not even need to exist, only the pointer
@@ -4874,7 +4898,7 @@ int ring_buffer_write(struct trace_buffer *buffer,
 	if (unlikely(trace_recursive_lock(cpu_buffer)))
 		return -EBUSY;
 
-	event = rb_reserve_next_event(buffer, cpu_buffer, length);
+	event = rb_reserve_next_event(buffer, cpu_buffer, length, false);
 	if (!event)
 		goto out_unlock;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a626211ce..ffc1b1e9c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6503,8 +6503,8 @@ static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
 	size = cnt + meta_size;
 
 	buffer = tr->array_buffer.buffer;
-	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
-					    tracing_gen_ctx());
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, size,
+						 tracing_gen_ctx());
 	if (unlikely(!event)) {
 		/*
 		 * If the size was greater than what was allowed, then
@@ -6917,8 +6917,8 @@ static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
 	if (size > ring_buffer_max_event_size(buffer))
 		return -EINVAL;
 
-	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
-					    tracing_gen_ctx());
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_RAW_DATA, size,
+						 tracing_gen_ctx());
 	if (!event)
 		/* Ring buffer disabled, return as if not open for write */
 		return -EBADF;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b8f380458..da55717c9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1613,6 +1613,21 @@ __trace_buffer_lock_reserve(struct trace_buffer *buffer,
 	return event;
 }
 
+static __always_inline struct ring_buffer_event *
+__trace_buffer_lock_reserve_long(struct trace_buffer *buffer,
+				 int type,
+				 unsigned long len,
+				 unsigned int trace_ctx)
+{
+	struct ring_buffer_event *event;
+
+	event = ring_buffer_lock_reserve_long(buffer, len);
+	if (event != NULL)
+		trace_event_setup(event, type, trace_ctx);
+
+	return event;
+}
+
 static __always_inline void
 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
 {
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9f67ce42e..1441b2bd4 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -444,8 +444,8 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip,
 	trace_ctx = tracing_gen_ctx();
 	buffer = tr->array_buffer.buffer;
 	guard(ring_buffer_nest)(buffer);
-	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
-					    trace_ctx);
+	event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, alloc,
+						 trace_ctx);
 	if (!event)
 		return 0;
 
@@ -725,8 +725,8 @@ int __trace_array_vprintk(struct trace_buffer *buffer,
 
 	size = sizeof(*entry) + len + 1;
 	scoped_guard(ring_buffer_nest, buffer) {
-		event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
-						    trace_ctx);
+		event = __trace_buffer_lock_reserve_long(buffer, TRACE_PRINT, size,
+							 trace_ctx);
 		if (!event)
 			goto out;
 		entry = ring_buffer_event_data(event);
-- 
2.39.5 (Apple Git-154)


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-15 10:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-07  9:15 [PATCH] ring-buffer: Preserve true payload lengths in long data events Cao Ruichuang
2026-04-07 15:48 ` Steven Rostedt
2026-04-07 16:45   ` Cao Ruichuang
2026-04-15  9:29 ` kernel test robot
2026-04-15 10:36 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox