public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
To: Steven Rostedt <rostedt@goodmis.org>,
	LKML <linux-kernel@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@elte.hu>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Christoph Hellwig <hch@lst.de>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Li Zefan <lizf@cn.fujitsu.com>,
	Lai Jiangshan <laijs@cn.fujitsu.com>,
	Johannes Berg <johannes.berg@intel.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>,
	Arnaldo Carvalho de Melo <acme@infradead.org>,
	Tom Zanussi <tzanussi@gmail.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Andi Kleen <andi@firstfloor.org>,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	William Lee Irwin III <wli@holomorphy.com>
Subject: [patch 05/20] Poll : add poll_wait_set_exclusive
Date: Fri, 09 Jul 2010 18:57:32 -0400	[thread overview]
Message-ID: <20100709225815.911511969@efficios.com> (raw)
In-Reply-To: 20100709225727.312232266@efficios.com

[-- Attachment #1: poll-wait-exclusive.patch --]
[-- Type: text/plain, Size: 4238 bytes --]

Executive summary:

poll_wait_set_exclusive : set poll wait queue to exclusive

Sets up a poll wait queue to use exclusive wakeups. This is useful to
wake up only one waiter at each wakeup. Used to work-around "thundering herd"
problem.

Detail:

* Problem description :

In the ring buffer poll() implementation, a typical multithreaded user-space
buffer reader polls all per-cpu buffer descriptors for data.  The number of
reader threads can be user-defined; the motivation for permitting this is that
there are typical workloads where a single CPU is producing most of the tracing
data and all other CPUs are idle, available to consume data. It therefore makes
sense not to tie those threads to specific buffers. However, when the number of
threads grows, we face a "thundering herd" problem where many threads can be
woken up and put back to sleep, leaving only a single thread doing useful work.

* Solution :

Introduce a poll_wait_set_exclusive() primitive to poll API, so the code which
implements the pollfd operation can specify that only a single waiter must be
woken up.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: William Lee Irwin III <wli@holomorphy.com>
CC: Ingo Molnar <mingo@elte.hu>
---
 fs/select.c          |   41 ++++++++++++++++++++++++++++++++++++++---
 include/linux/poll.h |    2 ++
 2 files changed, 40 insertions(+), 3 deletions(-)

Index: linux.trees.git/fs/select.c
===================================================================
--- linux.trees.git.orig/fs/select.c	2010-07-09 15:59:00.000000000 -0400
+++ linux.trees.git/fs/select.c	2010-07-09 16:03:24.000000000 -0400
@@ -112,6 +112,9 @@ struct poll_table_page {
  */
 static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
 		       poll_table *p);
+static void __pollwait_exclusive(struct file *filp,
+				 wait_queue_head_t *wait_address,
+				 poll_table *p);
 
 void poll_initwait(struct poll_wqueues *pwq)
 {
@@ -152,6 +155,20 @@ void poll_freewait(struct poll_wqueues *
 }
 EXPORT_SYMBOL(poll_freewait);
 
+/**
+ * poll_wait_set_exclusive - set poll wait queue to exclusive
+ *
+ * Sets up a poll wait queue to use exclusive wakeups. This is useful to
+ * wake up only one waiter at each wakeup. Used to work-around "thundering herd"
+ * problem.
+ */
+void poll_wait_set_exclusive(poll_table *p)
+{
+	if (p)
+		init_poll_funcptr(p, __pollwait_exclusive);
+}
+EXPORT_SYMBOL(poll_wait_set_exclusive);
+
 static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
 {
 	struct poll_table_page *table = p->table;
@@ -213,8 +230,10 @@ static int pollwake(wait_queue_t *wait,
 }
 
 /* Add a new entry */
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
-				poll_table *p)
+static void __pollwait_common(struct file *filp,
+			      wait_queue_head_t *wait_address,
+			      poll_table *p,
+			      int exclusive)
 {
 	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
 	struct poll_table_entry *entry = poll_get_entry(pwq);
@@ -226,7 +245,23 @@ static void __pollwait(struct file *filp
 	entry->key = p->key;
 	init_waitqueue_func_entry(&entry->wait, pollwake);
 	entry->wait.private = pwq;
-	add_wait_queue(wait_address, &entry->wait);
+	if (!exclusive)
+		add_wait_queue(wait_address, &entry->wait);
+	else
+		add_wait_queue_exclusive(wait_address, &entry->wait);
+}
+
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+				poll_table *p)
+{
+	__pollwait_common(filp, wait_address, p, 0);
+}
+
+static void __pollwait_exclusive(struct file *filp,
+				 wait_queue_head_t *wait_address,
+				 poll_table *p)
+{
+	__pollwait_common(filp, wait_address, p, 1);
 }
 
 int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
Index: linux.trees.git/include/linux/poll.h
===================================================================
--- linux.trees.git.orig/include/linux/poll.h	2010-07-09 15:59:00.000000000 -0400
+++ linux.trees.git/include/linux/poll.h	2010-07-09 16:03:24.000000000 -0400
@@ -79,6 +79,8 @@ static inline int poll_schedule(struct p
 	return poll_schedule_timeout(pwq, state, NULL, 0);
 }
 
+extern void poll_wait_set_exclusive(poll_table *p);
+
 /*
  * Scaleable version of the fd_set.
  */


  parent reply	other threads:[~2010-07-09 23:42 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-09 22:57 [patch 00/20] Generic Ring Buffer Library Mathieu Desnoyers
2010-07-09 22:57 ` [patch 01/20] Create generic alignment API (v8) Mathieu Desnoyers
2010-08-06 11:41   ` Alexander Shishkin
2010-08-06 14:48     ` Mathieu Desnoyers
2010-07-09 22:57 ` [patch 02/20] notifier atomic call chain notrace Mathieu Desnoyers
2010-07-09 22:57 ` [patch 03/20] idle notifier standardization Mathieu Desnoyers
2010-07-09 22:57 ` [patch 04/20] idle notifier standardization x86_32 Mathieu Desnoyers
2010-07-09 22:57 ` Mathieu Desnoyers [this message]
2010-07-09 22:57 ` [patch 06/20] prio_heap: heap_remove(), heap_maximum(), heap_replace() and heap_cherrypick() Mathieu Desnoyers
2010-07-09 22:57 ` [patch 07/20] kthread_kill_stop() Mathieu Desnoyers
2010-07-09 22:57 ` [patch 08/20] inline memcpy Mathieu Desnoyers
2010-07-09 22:57 ` [patch 09/20] x86 " Mathieu Desnoyers
2010-07-09 22:57 ` [patch 10/20] Trace clock - build standalone Mathieu Desnoyers
2010-07-09 22:57 ` [patch 11/20] Ftrace ring buffer renaming Mathieu Desnoyers
2010-07-09 22:57 ` [patch 12/20] ring buffer backend Mathieu Desnoyers
2010-07-09 22:57 ` [patch 13/20] ring buffer frontend Mathieu Desnoyers
2010-07-09 22:57 ` [patch 14/20] Ring buffer library - documentation Mathieu Desnoyers
2010-07-09 22:57 ` [patch 15/20] Ring buffer library - VFS operations Mathieu Desnoyers
2010-07-09 22:57 ` [patch 16/20] Ring buffer library - client sample Mathieu Desnoyers
2010-07-09 22:57 ` [patch 17/20] Ring buffer benchmark library Mathieu Desnoyers
2010-07-09 22:57 ` [patch 18/20] Ring Buffer Record Iterator Mathieu Desnoyers
2010-07-09 22:57 ` [patch 19/20] Ring Buffer: Basic API Mathieu Desnoyers
2010-07-09 22:57 ` [patch 20/20] Ring buffer: benchmark simple API Mathieu Desnoyers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100709225815.911511969@efficios.com \
    --to=mathieu.desnoyers@efficios.com \
    --cc=acme@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=fweisbec@gmail.com \
    --cc=hch@lst.de \
    --cc=johannes.berg@intel.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=masami.hiramatsu.pt@hitachi.com \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=tzanussi@gmail.com \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox