From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Namhyung Kim <namhyung@kernel.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Tom Zanussi <zanussi@kernel.org>,
Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Subject: [for-next][PATCH 16/30] ring-buffer: Add percentage of ring buffer full to wake up reader
Date: Wed, 05 Dec 2018 18:48:05 -0500 [thread overview]
Message-ID: <20181205234830.758310141@goodmis.org> (raw)
In-Reply-To: 20181205234749.372720574@goodmis.org
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Instead of just waiting for a page to be full before waking up a pending
reader, allow the reader to pass in a "percentage" of pages that have
content before waking up a reader. This should help keep the process of
reading the events not cause wake ups that constantly cause reading of the
buffer.
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
include/linux/ring_buffer.h | 4 ++-
kernel/trace/ring_buffer.c | 71 ++++++++++++++++++++++++++++++++++---
kernel/trace/trace.c | 8 ++---
3 files changed, 73 insertions(+), 10 deletions(-)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 0940fda59872..5b9ae62272bb 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
__ring_buffer_alloc((size), (flags), &__key); \
})
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table);
@@ -189,6 +189,8 @@ bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
size_t ring_buffer_page_len(void *page);
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu);
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu);
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 65bd4616220d..9edb628603ab 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -487,6 +487,9 @@ struct ring_buffer_per_cpu {
local_t dropped_events;
local_t committing;
local_t commits;
+ local_t pages_touched;
+ local_t pages_read;
+ size_t shortest_full;
unsigned long read;
unsigned long read_bytes;
u64 write_stamp;
@@ -529,6 +532,41 @@ struct ring_buffer_iter {
u64 read_stamp;
};
+/**
+ * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages used by a per_cpu buffer of the ring buffer.
+ */
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu)
+{
+ return buffer->buffers[cpu]->nr_pages;
+}
+
+/**
+ * ring_buffer_nr_pages_dirty - get the number of used pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages that have content in the ring buffer.
+ */
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
+{
+ size_t read;
+ size_t cnt;
+
+ read = local_read(&buffer->buffers[cpu]->pages_read);
+ cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+ /* The reader can read an empty page, but not more than that */
+ if (cnt < read) {
+ WARN_ON_ONCE(read > cnt + 1);
+ return 0;
+ }
+
+ return cnt - read;
+}
+
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@@ -556,7 +594,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
{
struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
DEFINE_WAIT(wait);
@@ -571,7 +609,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
- full = false;
+ full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
@@ -623,15 +661,22 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
+ size_t nr_pages;
+ size_t dirty;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full < full)
+ cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
- if (!pagebusy)
+ if (!pagebusy &&
+ (!nr_pages || (dirty * 100) > full * nr_pages))
break;
}
@@ -1054,6 +1099,7 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+ local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -2603,6 +2649,16 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
+ size_t nr_pages;
+ size_t dirty;
+ size_t full;
+
+ full = cpu_buffer->shortest_full;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
+ if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+ return;
+
cpu_buffer->irq_work.wakeup_full = true;
cpu_buffer->irq_work.full_waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
@@ -3732,13 +3788,15 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
goto spin;
/*
- * Yeah! We succeeded in replacing the page.
+ * Yay! We succeeded in replacing the page.
*
* Now make the new head point back to the reader page.
*/
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+ local_inc(&cpu_buffer->pages_read);
+
/* Finally update the reader page to the new head */
cpu_buffer->reader_page = reader;
cpu_buffer->reader_page->read = 0;
@@ -4334,6 +4392,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
+ local_set(&cpu_buffer->pages_touched, 0);
+ local_set(&cpu_buffer->pages_read, 0);
+ cpu_buffer->shortest_full = 0;
cpu_buffer->read = 0;
cpu_buffer->read_bytes = 0;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ff1c4b20cd0a..48d5eb22ff33 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1431,7 +1431,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
}
#endif /* CONFIG_TRACER_MAX_TRACE */
-static int wait_on_pipe(struct trace_iterator *iter, bool full)
+static int wait_on_pipe(struct trace_iterator *iter, int full)
{
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
@@ -5693,7 +5693,7 @@ static int tracing_wait_pipe(struct file *filp)
mutex_unlock(&iter->mutex);
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
mutex_lock(&iter->mutex);
@@ -6751,7 +6751,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
if (ret)
return ret;
@@ -6948,7 +6948,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out;
- ret = wait_on_pipe(iter, true);
+ ret = wait_on_pipe(iter, 1);
if (ret)
goto out;
--
2.19.1
next prev parent reply other threads:[~2018-12-05 23:49 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-05 23:47 [for-next][PATCH 00/30] tracing: Updates for the next merge window Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 01/30] function_graph: Remove unused task_curr_ret_stack() Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 02/30] tracing: Do not line wrap short line in function_graph_enter() Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 03/30] fgraph: Create a fgraph.c file to store function graph infrastructure Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 04/30] fgraph: Have set_graph_notrace only affect function_graph tracer Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 05/30] arm64: function_graph: Remove use of FTRACE_NOTRACE_DEPTH Steven Rostedt
2018-12-06 3:55 ` Steven Rostedt
2018-12-06 15:49 ` Will Deacon
2018-12-06 15:55 ` Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 06/30] function_graph: Remove the " Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 07/30] ftrace: Create new ftrace_internal.h header Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 08/30] function_graph: Do not expose the graph_time option when profiler is not configured Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 09/30] fgraph: Move function graph specific code into fgraph.c Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 10/30] tracing: Rearrange functions in trace_sched_wakeup.c Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 11/30] fgraph: Add new fgraph_ops structure to enable function graph hooks Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 12/30] function_graph: Move ftrace_graph_ret_addr() to fgraph.c Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 13/30] function_graph: Have profiler use new helper ftrace_graph_get_ret_stack() Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 14/30] tracing: Have trace_stack nr_entries compare not be so subtle Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 15/30] scripts/recordmcount.{c,pl}: support -ffunction-sections .text.* section names Steven Rostedt
2018-12-05 23:48 ` Steven Rostedt [this message]
2018-12-05 23:48 ` [for-next][PATCH 17/30] tracing: Add tracefs file buffer_percentage Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 18/30] tracing: Change default buffer_percent to 50 Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 19/30] tracing/uprobes: Add busy check when cleanup all uprobes Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 20/30] tracing: Lock event_mutex before synth_event_mutex Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 21/30] tracing: Simplify creation and deletion of synthetic events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 22/30] tracing: Integrate similar probe argument parsers Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 23/30] tracing: Add unified dynamic event framework Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 24/30] tracing/kprobes: Use dyn_event framework for kprobe events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 25/30] tracing/uprobes: Use dyn_event framework for uprobe events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 26/30] tracing: Use dyn_event framework for synthetic events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 27/30] tracing: Remove unneeded synth_event_mutex Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 28/30] tracing: Consolidate trace_add/remove_event_call back to the nolock functions Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 29/30] tracing: Add generic event-name based remove event method Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 30/30] selftests/ftrace: Add testcases for dynamic event Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181205234830.758310141@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=ravi.bangoria@linux.vnet.ibm.com \
--cc=zanussi@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox