From: Steven Rostedt <rostedt@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Andrew Morton <akpm@linux-foundation.org>,
Vincent Donnefort <vdonnefort@google.com>
Subject: [for-next][PATCH 07/18] tracing: Add non-consuming read to trace remotes
Date: Tue, 10 Mar 2026 10:35:22 -0400 [thread overview]
Message-ID: <20260310143603.689651155@kernel.org> (raw)
In-Reply-To: 20260310143515.132579088@kernel.org
From: Vincent Donnefort <vdonnefort@google.com>
Allow reading the trace file for trace remotes. This performs a
non-consuming read of the trace buffer.
Link: https://patch.msgid.link/20260309162516.2623589-8-vdonnefort@google.com
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
kernel/trace/trace.c | 2 +-
kernel/trace/trace.h | 1 +
kernel/trace/trace_remote.c | 339 ++++++++++++++++++++++++++++++++++--
3 files changed, 326 insertions(+), 16 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e33cb3c39f6e..5e1129b011cb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3856,7 +3856,7 @@ static int s_show(struct seq_file *m, void *v)
* Should be used after trace_array_get(), trace_types_lock
* ensures that i_cdev was already initialized.
*/
-static inline int tracing_get_cpu(struct inode *inode)
+int tracing_get_cpu(struct inode *inode)
{
if (inode->i_cdev) /* See trace_create_cpu_file() */
return (long)inode->i_cdev - 1;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d6af4405b341..fbbd0b2ee76f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -695,6 +695,7 @@ struct dentry *trace_create_cpu_file(const char *name,
void *data,
long cpu,
const struct file_operations *fops);
+int tracing_get_cpu(struct inode *inode);
/**
diff --git a/kernel/trace/trace_remote.c b/kernel/trace/trace_remote.c
index a7b94736dd38..039ba71c3b3e 100644
--- a/kernel/trace/trace_remote.c
+++ b/kernel/trace/trace_remote.c
@@ -18,14 +18,23 @@
#define TRACEFS_MODE_WRITE 0640
#define TRACEFS_MODE_READ 0440
+enum tri_type {
+ TRI_CONSUMING,
+ TRI_NONCONSUMING,
+};
+
struct trace_remote_iterator {
struct trace_remote *remote;
struct trace_seq seq;
struct delayed_work poll_work;
unsigned long lost_events;
u64 ts;
+ struct ring_buffer_iter *rb_iter;
+ struct ring_buffer_iter **rb_iters;
int cpu;
int evt_cpu;
+ loff_t pos;
+ enum tri_type type;
};
struct trace_remote {
@@ -36,6 +45,8 @@ struct trace_remote {
unsigned long trace_buffer_size;
struct ring_buffer_remote rb_remote;
struct mutex lock;
+ struct rw_semaphore reader_lock;
+ struct rw_semaphore *pcpu_reader_locks;
unsigned int nr_readers;
unsigned int poll_ms;
bool tracing_on;
@@ -230,6 +241,20 @@ static int trace_remote_get(struct trace_remote *remote, int cpu)
if (ret)
return ret;
+ if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) {
+ int lock_cpu;
+
+ remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks),
+ GFP_KERNEL);
+ if (!remote->pcpu_reader_locks) {
+ trace_remote_try_unload(remote);
+ return -ENOMEM;
+ }
+
+ for_each_possible_cpu(lock_cpu)
+ init_rwsem(&remote->pcpu_reader_locks[lock_cpu]);
+ }
+
remote->nr_readers++;
return 0;
@@ -244,6 +269,9 @@ static void trace_remote_put(struct trace_remote *remote)
if (remote->nr_readers)
return;
+ kfree(remote->pcpu_reader_locks);
+ remote->pcpu_reader_locks = NULL;
+
trace_remote_try_unload(remote);
}
@@ -258,13 +286,55 @@ static void __poll_remote(struct work_struct *work)
msecs_to_jiffies(iter->remote->poll_ms));
}
-static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu)
+static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ ring_buffer_read_finish(iter->rb_iter);
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ if (iter->rb_iters[cpu])
+ ring_buffer_read_finish(iter->rb_iters[cpu]);
+ }
+
+ kfree(iter->rb_iters);
+}
+
+static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL);
+
+ return iter->rb_iter ? 0 : -ENOMEM;
+ }
+
+ iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL);
+ if (!iter->rb_iters)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu,
+ GFP_KERNEL);
+ if (!iter->rb_iters[cpu]) {
+ __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static struct trace_remote_iterator
+*trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type)
{
struct trace_remote_iterator *iter = NULL;
int ret;
lockdep_assert_held(&remote->lock);
+ if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote))
+ return NULL;
ret = trace_remote_get(remote, cpu);
if (ret)
@@ -279,9 +349,21 @@ static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remo
if (iter) {
iter->remote = remote;
iter->cpu = cpu;
+ iter->type = type;
trace_seq_init(&iter->seq);
- INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
- schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+
+ switch (type) {
+ case TRI_CONSUMING:
+ INIT_DELAYED_WORK(&iter->poll_work, __poll_remote);
+ schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms));
+ break;
+ case TRI_NONCONSUMING:
+ ret = __alloc_ring_buffer_iter(iter, cpu);
+ break;
+ }
+
+ if (ret)
+ goto err;
return iter;
}
@@ -305,10 +387,100 @@ static void trace_remote_iter_free(struct trace_remote_iterator *iter)
lockdep_assert_held(&remote->lock);
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ cancel_delayed_work_sync(&iter->poll_work);
+ break;
+ case TRI_NONCONSUMING:
+ __free_ring_buffer_iter(iter, iter->cpu);
+ break;
+ }
+
kfree(iter);
trace_remote_put(remote);
}
+static void trace_remote_iter_read_start(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Acquire global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ down_write(&remote->reader_lock);
+ else
+ down_read(&remote->reader_lock);
+
+ if (cpu == RING_BUFFER_ALL_CPUS)
+ return;
+
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+
+ /* Get the per-CPU one */
+ if (WARN_ON_ONCE(!remote->pcpu_reader_locks))
+ return;
+
+ if (iter->type == TRI_CONSUMING)
+ down_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ down_read(&remote->pcpu_reader_locks[cpu]);
+}
+
+static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter)
+{
+ struct trace_remote *remote = iter->remote;
+ int cpu = iter->cpu;
+
+ /* Release per-CPU reader lock */
+ if (cpu != RING_BUFFER_ALL_CPUS) {
+ /*
+ * No need for the remote lock here, iter holds a reference on
+ * remote->nr_readers
+ */
+ if (iter->type == TRI_CONSUMING)
+ up_write(&remote->pcpu_reader_locks[cpu]);
+ else
+ up_read(&remote->pcpu_reader_locks[cpu]);
+ }
+
+ /* Release global reader lock */
+ if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING)
+ up_write(&remote->reader_lock);
+ else
+ up_read(&remote->reader_lock);
+}
+
+static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu)
+{
+ return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu];
+}
+
+static struct ring_buffer_event *
+__peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events)
+{
+ struct ring_buffer_event *rb_evt;
+ struct ring_buffer_iter *rb_iter;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events);
+ case TRI_NONCONSUMING:
+ rb_iter = __get_rb_iter(iter, cpu);
+ rb_evt = ring_buffer_iter_peek(rb_iter, ts);
+ if (!rb_evt)
+ return NULL;
+
+ *lost_events = ring_buffer_iter_dropped(rb_iter);
+
+ return rb_evt;
+ }
+
+ return NULL;
+}
+
static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
{
struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
@@ -318,7 +490,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
if (ring_buffer_empty_cpu(trace_buffer, cpu))
return false;
- if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events))
+ if (!__peek_event(iter, cpu, &iter->ts, &iter->lost_events))
return false;
iter->evt_cpu = cpu;
@@ -333,7 +505,7 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
if (ring_buffer_empty_cpu(trace_buffer, cpu))
continue;
- if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events))
+ if (!__peek_event(iter, cpu, &ts, &lost_events))
continue;
if (ts >= iter->ts)
@@ -347,6 +519,20 @@ static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter)
return iter->ts != U64_MAX;
}
+static void trace_remote_iter_move(struct trace_remote_iterator *iter)
+{
+ struct trace_buffer *trace_buffer = iter->remote->trace_buffer;
+
+ switch (iter->type) {
+ case TRI_CONSUMING:
+ ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+ break;
+ case TRI_NONCONSUMING:
+ ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu));
+ break;
+ }
+}
+
static int trace_remote_iter_print_event(struct trace_remote_iterator *iter)
{
unsigned long usecs_rem;
@@ -369,13 +555,14 @@ static int trace_pipe_open(struct inode *inode, struct file *filp)
{
struct trace_remote *remote = inode->i_private;
struct trace_remote_iterator *iter;
- int cpu = RING_BUFFER_ALL_CPUS;
-
- if (inode->i_cdev)
- cpu = (long)inode->i_cdev - 1;
+ int cpu = tracing_get_cpu(inode);
guard(mutex)(&remote->lock);
- iter = trace_remote_iter(remote, cpu);
+
+ iter = trace_remote_iter(remote, cpu, TRI_CONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
filp->private_data = iter;
return IS_ERR(iter) ? PTR_ERR(iter) : 0;
@@ -410,6 +597,8 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
if (ret < 0)
return ret;
+ trace_remote_iter_read_start(iter);
+
while (trace_remote_iter_read_event(iter)) {
int prev_len = iter->seq.seq.len;
@@ -418,9 +607,11 @@ static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt,
break;
}
- ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL);
+ trace_remote_iter_move(iter);
}
+ trace_remote_iter_read_finished(iter);
+
goto copy_to_user;
}
@@ -430,14 +621,127 @@ static const struct file_operations trace_pipe_fops = {
.release = trace_pipe_release,
};
+static void *trace_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+
+ ++*pos;
+
+ if (!iter || !trace_remote_iter_read_event(iter))
+ return NULL;
+
+ trace_remote_iter_move(iter);
+ iter->pos++;
+
+ return iter;
+}
+
+static void *trace_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_remote_iterator *iter = m->private;
+ loff_t i;
+
+ if (!iter)
+ return NULL;
+
+ trace_remote_iter_read_start(iter);
+
+ if (!*pos) {
+ iter->pos = -1;
+ return trace_next(m, NULL, &i);
+ }
+
+ i = iter->pos;
+ while (i < *pos) {
+ iter = trace_next(m, NULL, &i);
+ if (!iter)
+ return NULL;
+ }
+
+ return iter;
+}
+
+static int trace_show(struct seq_file *m, void *v)
+{
+ struct trace_remote_iterator *iter = v;
+
+ trace_seq_init(&iter->seq);
+
+ if (trace_remote_iter_print_event(iter)) {
+ seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id);
+ return 0;
+ }
+
+ return trace_print_seq(m, &iter->seq);
+}
+
+static void trace_stop(struct seq_file *m, void *v)
+{
+ struct trace_remote_iterator *iter = m->private;
+
+ if (iter)
+ trace_remote_iter_read_finished(iter);
+}
+
+static const struct seq_operations trace_sops = {
+ .start = trace_start,
+ .next = trace_next,
+ .show = trace_show,
+ .stop = trace_stop,
+};
+
+static int trace_open(struct inode *inode, struct file *filp)
+{
+ struct trace_remote *remote = inode->i_private;
+ struct trace_remote_iterator *iter = NULL;
+ int cpu = tracing_get_cpu(inode);
+ int ret;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ guard(mutex)(&remote->lock);
+
+ iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = seq_open(filp, &trace_sops);
+ if (ret) {
+ trace_remote_iter_free(iter);
+ return ret;
+ }
+
+ ((struct seq_file *)filp->private_data)->private = (void *)iter;
+
+ return 0;
+}
+
+static int trace_release(struct inode *inode, struct file *filp)
+{
+ struct trace_remote_iterator *iter;
+
+ if (!(filp->f_mode & FMODE_READ))
+ return 0;
+
+ iter = ((struct seq_file *)filp->private_data)->private;
+ seq_release(inode, filp);
+
+ if (!iter)
+ return 0;
+
+ guard(mutex)(&iter->remote->lock);
+
+ trace_remote_iter_free(iter);
+
+ return 0;
+}
+
static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
{
struct inode *inode = file_inode(filp);
struct trace_remote *remote = inode->i_private;
- int cpu = RING_BUFFER_ALL_CPUS;
-
- if (inode->i_cdev)
- cpu = (long)inode->i_cdev - 1;
+ int cpu = tracing_get_cpu(inode);
guard(mutex)(&remote->lock);
@@ -447,7 +751,11 @@ static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cn
}
static const struct file_operations trace_fops = {
+ .open = trace_open,
.write = trace_write,
+ .read = seq_read,
+ .read_iter = seq_read_iter,
+ .release = trace_release,
};
static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote)
@@ -565,6 +873,7 @@ int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs,
remote->trace_buffer_size = 7 << 10;
remote->poll_ms = 100;
mutex_init(&remote->lock);
+ init_rwsem(&remote->reader_lock);
if (trace_remote_init_tracefs(name, remote)) {
kfree(remote);
--
2.51.0
next prev parent reply other threads:[~2026-03-10 14:35 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-10 14:35 [for-next][PATCH 00/18] ring-buffer: Add remote buffer infrastructure Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 01/18] ring-buffer: Add page statistics to the meta-page Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 02/18] ring-buffer: Store bpage pointers into subbuf_ids Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 03/18] ring-buffer: Introduce ring-buffer remotes Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 04/18] ring-buffer: Add non-consuming read for " Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 05/18] tracing: Introduce trace remotes Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 06/18] tracing: Add reset to " Steven Rostedt
2026-03-10 14:35 ` Steven Rostedt [this message]
2026-03-10 14:35 ` [for-next][PATCH 08/18] tracing: Add init callback " Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 09/18] tracing: Add events " Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 10/18] tracing: Add events/ root files " Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 11/18] tracing: Add helpers to create trace remote events Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 12/18] ring-buffer: Export buffer_data_page and macros Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 13/18] tracing: Introduce simple_ring_buffer Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 14/18] tracing: Add a trace remote module for testing Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 15/18] tracing: selftests: Add trace remote tests Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 16/18] Documentation: tracing: Add tracing remotes Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 17/18] tracing: load/unload page callbacks for simple_ring_buffer Steven Rostedt
2026-03-10 14:35 ` [for-next][PATCH 18/18] tracing: Check for undefined symbols in simple_ring_buffer Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260310143603.689651155@kernel.org \
--to=rostedt@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mathieu.desnoyers@efficios.com \
--cc=mhiramat@kernel.org \
--cc=vdonnefort@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.