public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Tom Zanussi <zanussi@kernel.org>,
	Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Subject: [for-next][PATCH 16/30] ring-buffer: Add percentage of ring buffer full to wake up reader
Date: Wed, 05 Dec 2018 18:48:05 -0500	[thread overview]
Message-ID: <20181205234830.758310141@goodmis.org> (raw)
In-Reply-To: 20181205234749.372720574@goodmis.org

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Instead of just waiting for a page to be full before waking up a pending
reader, allow the reader to pass in a "percentage" of pages that have
content before waking up a reader. This should help keep the process of
reading the events not cause wake ups that constantly cause reading of the
buffer.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  4 ++-
 kernel/trace/ring_buffer.c  | 71 ++++++++++++++++++++++++++++++++++---
 kernel/trace/trace.c        |  8 ++---
 3 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 0940fda59872..5b9ae62272bb 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full);
 __poll_t ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 
@@ -189,6 +189,8 @@ bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
 
 size_t ring_buffer_page_len(void *page);
 
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu);
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu);
 
 void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
 void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 65bd4616220d..9edb628603ab 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -487,6 +487,9 @@ struct ring_buffer_per_cpu {
 	local_t				dropped_events;
 	local_t				committing;
 	local_t				commits;
+	local_t				pages_touched;
+	local_t				pages_read;
+	size_t				shortest_full;
 	unsigned long			read;
 	unsigned long			read_bytes;
 	u64				write_stamp;
@@ -529,6 +532,41 @@ struct ring_buffer_iter {
 	u64				read_stamp;
 };
 
+/**
+ * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages used by a per_cpu buffer of the ring buffer.
+ */
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu)
+{
+	return buffer->buffers[cpu]->nr_pages;
+}
+
+/**
+ * ring_buffer_nr_pages_dirty - get the number of used pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages that have content in the ring buffer.
+ */
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
+{
+	size_t read;
+	size_t cnt;
+
+	read = local_read(&buffer->buffers[cpu]->pages_read);
+	cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+	/* The reader can read an empty page, but not more than that */
+	if (cnt < read) {
+		WARN_ON_ONCE(read > cnt + 1);
+		return 0;
+	}
+
+	return cnt - read;
+}
+
 /*
  * rb_wake_up_waiters - wake up tasks waiting for ring buffer input
  *
@@ -556,7 +594,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
 {
 	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
 	DEFINE_WAIT(wait);
@@ -571,7 +609,7 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
 	if (cpu == RING_BUFFER_ALL_CPUS) {
 		work = &buffer->irq_work;
 		/* Full only makes sense on per cpu reads */
-		full = false;
+		full = 0;
 	} else {
 		if (!cpumask_test_cpu(cpu, buffer->cpumask))
 			return -ENODEV;
@@ -623,15 +661,22 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
 		    !ring_buffer_empty_cpu(buffer, cpu)) {
 			unsigned long flags;
 			bool pagebusy;
+			size_t nr_pages;
+			size_t dirty;
 
 			if (!full)
 				break;
 
 			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+			nr_pages = cpu_buffer->nr_pages;
+			dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+			if (!cpu_buffer->shortest_full ||
+			    cpu_buffer->shortest_full < full)
+				cpu_buffer->shortest_full = full;
 			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
-			if (!pagebusy)
+			if (!pagebusy &&
+			    (!nr_pages || (dirty * 100) > full * nr_pages))
 				break;
 		}
 
@@ -1054,6 +1099,7 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
 	old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
 	old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
 
+	local_inc(&cpu_buffer->pages_touched);
 	/*
 	 * Just make sure we have seen our old_write and synchronize
 	 * with any interrupts that come in.
@@ -2603,6 +2649,16 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
 	pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
 
 	if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
+		size_t nr_pages;
+		size_t dirty;
+		size_t full;
+
+		full = cpu_buffer->shortest_full;
+		nr_pages = cpu_buffer->nr_pages;
+		dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
+		if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+			return;
+
 		cpu_buffer->irq_work.wakeup_full = true;
 		cpu_buffer->irq_work.full_waiters_pending = false;
 		/* irq_work_queue() supplies it's own memory barriers */
@@ -3732,13 +3788,15 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 		goto spin;
 
 	/*
-	 * Yeah! We succeeded in replacing the page.
+	 * Yay! We succeeded in replacing the page.
 	 *
 	 * Now make the new head point back to the reader page.
 	 */
 	rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
 	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
+	local_inc(&cpu_buffer->pages_read);
+
 	/* Finally update the reader page to the new head */
 	cpu_buffer->reader_page = reader;
 	cpu_buffer->reader_page->read = 0;
@@ -4334,6 +4392,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->entries, 0);
 	local_set(&cpu_buffer->committing, 0);
 	local_set(&cpu_buffer->commits, 0);
+	local_set(&cpu_buffer->pages_touched, 0);
+	local_set(&cpu_buffer->pages_read, 0);
+	cpu_buffer->shortest_full = 0;
 	cpu_buffer->read = 0;
 	cpu_buffer->read_bytes = 0;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ff1c4b20cd0a..48d5eb22ff33 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1431,7 +1431,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static int wait_on_pipe(struct trace_iterator *iter, bool full)
+static int wait_on_pipe(struct trace_iterator *iter, int full)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
@@ -5693,7 +5693,7 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		ret = wait_on_pipe(iter, false);
+		ret = wait_on_pipe(iter, 0);
 
 		mutex_lock(&iter->mutex);
 
@@ -6751,7 +6751,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 			if ((filp->f_flags & O_NONBLOCK))
 				return -EAGAIN;
 
-			ret = wait_on_pipe(iter, false);
+			ret = wait_on_pipe(iter, 0);
 			if (ret)
 				return ret;
 
@@ -6948,7 +6948,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
 			goto out;
 
-		ret = wait_on_pipe(iter, true);
+		ret = wait_on_pipe(iter, 1);
 		if (ret)
 			goto out;
 
-- 
2.19.1



  parent reply	other threads:[~2018-12-05 23:49 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-05 23:47 [for-next][PATCH 00/30] tracing: Updates for the next merge window Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 01/30] function_graph: Remove unused task_curr_ret_stack() Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 02/30] tracing: Do not line wrap short line in function_graph_enter() Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 03/30] fgraph: Create a fgraph.c file to store function graph infrastructure Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 04/30] fgraph: Have set_graph_notrace only affect function_graph tracer Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 05/30] arm64: function_graph: Remove use of FTRACE_NOTRACE_DEPTH Steven Rostedt
2018-12-06  3:55   ` Steven Rostedt
2018-12-06 15:49   ` Will Deacon
2018-12-06 15:55     ` Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 06/30] function_graph: Remove the " Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 07/30] ftrace: Create new ftrace_internal.h header Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 08/30] function_graph: Do not expose the graph_time option when profiler is not configured Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 09/30] fgraph: Move function graph specific code into fgraph.c Steven Rostedt
2018-12-05 23:47 ` [for-next][PATCH 10/30] tracing: Rearrange functions in trace_sched_wakeup.c Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 11/30] fgraph: Add new fgraph_ops structure to enable function graph hooks Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 12/30] function_graph: Move ftrace_graph_ret_addr() to fgraph.c Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 13/30] function_graph: Have profiler use new helper ftrace_graph_get_ret_stack() Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 14/30] tracing: Have trace_stack nr_entries compare not be so subtle Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 15/30] scripts/recordmcount.{c,pl}: support -ffunction-sections .text.* section names Steven Rostedt
2018-12-05 23:48 ` Steven Rostedt [this message]
2018-12-05 23:48 ` [for-next][PATCH 17/30] tracing: Add tracefs file buffer_percentage Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 18/30] tracing: Change default buffer_percent to 50 Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 19/30] tracing/uprobes: Add busy check when cleanup all uprobes Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 20/30] tracing: Lock event_mutex before synth_event_mutex Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 21/30] tracing: Simplify creation and deletion of synthetic events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 22/30] tracing: Integrate similar probe argument parsers Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 23/30] tracing: Add unified dynamic event framework Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 24/30] tracing/kprobes: Use dyn_event framework for kprobe events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 25/30] tracing/uprobes: Use dyn_event framework for uprobe events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 26/30] tracing: Use dyn_event framework for synthetic events Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 27/30] tracing: Remove unneeded synth_event_mutex Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 28/30] tracing: Consolidate trace_add/remove_event_call back to the nolock functions Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 29/30] tracing: Add generic event-name based remove event method Steven Rostedt
2018-12-05 23:48 ` [for-next][PATCH 30/30] selftests/ftrace: Add testcases for dynamic event Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181205234830.758310141@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=ravi.bangoria@linux.vnet.ibm.com \
    --cc=zanussi@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox