All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <piggin@cyberone.com.au>
To: Nick Piggin <piggin@cyberone.com.au>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	William Lee Irwin III <wli@holomorphy.com>,
	Ingo Molnar <mingo@redhat.com>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Anton Blanchard <anton@samba.org>,
	"Martin J. Bligh" <mbligh@aracnet.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>,
	Mark Wong <markw@osdl.org>
Subject: Re: [PATCH] improve rwsem scalability (was Re: [CFT][RFC] HT scheduler)
Date: Fri, 12 Dec 2003 13:02:37 +1100	[thread overview]
Message-ID: <3FD921BD.5040208@cyberone.com.au> (raw)
In-Reply-To: <3FD91F5D.30005@cyberone.com.au>

[-- Attachment #1: Type: text/plain, Size: 435 bytes --]



Nick Piggin wrote:

>
> The following patch moves the rwsem's up_read wake ups out of the
> wait_lock. Wakeups need to aquire a runqueue lock which is probably
> getting contended. The following graph is a best of 3 runs average.
> http://www.kerneltrap.org/~npiggin/rwsem.png
>
> The part to look at is the tail. I need to do some more testing to
> see if its significant.


Err, here's one with a comment changed to match, sorry.


[-- Attachment #2: rwsem-scale.patch --]
[-- Type: text/plain, Size: 3984 bytes --]


Move rwsem's up_read wakeups out of the semaphore's wait_lock


 linux-2.6-npiggin/lib/rwsem.c |   41 +++++++++++++++++++++++++++--------------
 1 files changed, 27 insertions(+), 14 deletions(-)

diff -puN lib/rwsem.c~rwsem-scale lib/rwsem.c
--- linux-2.6/lib/rwsem.c~rwsem-scale	2003-12-12 13:01:28.000000000 +1100
+++ linux-2.6-npiggin/lib/rwsem.c	2003-12-12 13:01:45.000000000 +1100
@@ -8,7 +8,7 @@
 #include <linux/module.h>
 
 struct rwsem_waiter {
-	struct list_head	list;
+	struct list_head	list, wake_list;
 	struct task_struct	*task;
 	unsigned int		flags;
 #define RWSEM_WAITING_FOR_READ	0x00000001
@@ -35,9 +35,12 @@ void rwsemtrace(struct rw_semaphore *sem
  * - the spinlock must be held by the caller
  * - woken process blocks are discarded from the list after having flags zeroised
  * - writers are only woken if wakewrite is non-zero
+ *
+ * The spinlock will be dropped by this function
  */
 static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 {
+	LIST_HEAD(wake_list);
 	struct rwsem_waiter *waiter;
 	struct list_head *next;
 	signed long oldcount;
@@ -65,7 +68,8 @@ static inline struct rw_semaphore *__rws
 
 	list_del(&waiter->list);
 	waiter->flags = 0;
-	wake_up_process(waiter->task);
+	if (list_empty(&waiter->wake_list))
+		list_add_tail(&waiter->wake_list, &wake_list);
 	goto out;
 
 	/* don't want to wake any writers */
@@ -74,9 +78,10 @@ static inline struct rw_semaphore *__rws
 	if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
 		goto out;
 
-	/* grant an infinite number of read locks to the readers at the front of the queue
-	 * - note we increment the 'active part' of the count by the number of readers (less one
-	 *   for the activity decrement we've already done) before waking any processes up
+	/* grant an infinite number of read locks to the readers at the front
+	 * of the queue - note we increment the 'active part' of the count by
+	 * the number of readers (less one for the activity decrement we've
+	 * already done) before waking any processes up
 	 */
  readers_only:
 	woken = 0;
@@ -100,13 +105,21 @@ static inline struct rw_semaphore *__rws
 		waiter = list_entry(next,struct rwsem_waiter,list);
 		next = waiter->list.next;
 		waiter->flags = 0;
-		wake_up_process(waiter->task);
+		if (list_empty(&waiter->wake_list))
+			list_add_tail(&waiter->wake_list, &wake_list);
 	}
 
 	sem->wait_list.next = next;
 	next->prev = &sem->wait_list;
 
  out:
+	spin_unlock(&sem->wait_lock);
+	while (!list_empty(&wake_list)) {
+		waiter = list_entry(wake_list.next,struct rwsem_waiter,wake_list);
+		list_del_init(&waiter->wake_list);
+		mb();
+		wake_up_process(waiter->task);
+	}
 	rwsemtrace(sem,"Leaving __rwsem_do_wake");
 	return sem;
 
@@ -130,9 +143,9 @@ static inline struct rw_semaphore *rwsem
 	set_task_state(tsk,TASK_UNINTERRUPTIBLE);
 
 	/* set up my own style of waitqueue */
-	spin_lock(&sem->wait_lock);
 	waiter->task = tsk;
-
+	INIT_LIST_HEAD(&waiter->wake_list);
+	spin_lock(&sem->wait_lock);
 	list_add_tail(&waiter->list,&sem->wait_list);
 
 	/* note that we're now waiting on the lock, but no longer actively read-locking */
@@ -143,8 +156,8 @@ static inline struct rw_semaphore *rwsem
 	 */
 	if (!(count & RWSEM_ACTIVE_MASK))
 		sem = __rwsem_do_wake(sem,1);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	/* wait to be given the lock */
 	for (;;) {
@@ -204,8 +217,8 @@ struct rw_semaphore *rwsem_wake(struct r
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem,1);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving rwsem_wake");
 
@@ -226,8 +239,8 @@ struct rw_semaphore *rwsem_downgrade_wak
 	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem,0);
-
-	spin_unlock(&sem->wait_lock);
+	else
+		spin_unlock(&sem->wait_lock);
 
 	rwsemtrace(sem,"Leaving rwsem_downgrade_wake");
 	return sem;

_

  reply	other threads:[~2003-12-12  2:20 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-12-08  4:25 [PATCH][RFC] make cpu_sibling_map a cpumask_t Nick Piggin
2003-12-08 15:59 ` Anton Blanchard
2003-12-08 23:08   ` Nick Piggin
2003-12-09  0:14     ` Anton Blanchard
2003-12-11  4:25       ` [CFT][RFC] HT scheduler Nick Piggin
2003-12-11  7:24         ` Nick Piggin
2003-12-11  8:57           ` Nick Piggin
2003-12-11 11:52             ` William Lee Irwin III
2003-12-11 13:09               ` Nick Piggin
2003-12-11 13:23                 ` William Lee Irwin III
2003-12-11 13:30                   ` Nick Piggin
2003-12-11 13:32                     ` William Lee Irwin III
2003-12-11 15:30                       ` Nick Piggin
2003-12-11 15:38                         ` William Lee Irwin III
2003-12-11 15:51                           ` Nick Piggin
2003-12-11 15:56                             ` William Lee Irwin III
2003-12-11 16:37                               ` Nick Piggin
2003-12-11 16:40                                 ` William Lee Irwin III
2003-12-12  1:52                         ` [PATCH] improve rwsem scalability (was Re: [CFT][RFC] HT scheduler) Nick Piggin
2003-12-12  2:02                           ` Nick Piggin [this message]
2003-12-12  9:41                           ` Ingo Molnar
2003-12-13  0:07                             ` Nick Piggin
2003-12-14  0:44                               ` Nick Piggin
2003-12-17  5:27                                 ` Nick Piggin
2003-12-19 11:52                                   ` Nick Piggin
2003-12-19 15:06                                     ` Martin J. Bligh
2003-12-20  0:08                                       ` Nick Piggin
2003-12-12  0:58             ` [CFT][RFC] HT scheduler Rusty Russell
2003-12-11 10:01           ` Rhino
2003-12-11  8:14             ` Nick Piggin
2003-12-11 16:49               ` Rhino
2003-12-11 15:16                 ` Nick Piggin
2003-12-11 11:40             ` William Lee Irwin III
2003-12-11 17:05               ` Rhino
2003-12-11 15:17                 ` William Lee Irwin III
2003-12-11 16:28         ` Kevin P. Fleming
2003-12-11 16:41           ` Nick Piggin
2003-12-12  2:24         ` Rusty Russell
2003-12-12  7:00           ` Nick Piggin
2003-12-12  7:23             ` Rusty Russell
2003-12-13  6:43               ` Nick Piggin
2003-12-14  1:35                 ` bill davidsen
2003-12-14  2:18                   ` Nick Piggin
2003-12-14  4:32                     ` Jamie Lokier
2003-12-14  9:40                       ` Nick Piggin
2003-12-14 10:46                         ` Arjan van de Ven
2003-12-16 17:46                         ` Bill Davidsen
2003-12-16 18:22                       ` Linus Torvalds
2003-12-17  0:24                         ` Davide Libenzi
2003-12-17  0:41                           ` Linus Torvalds
2003-12-17  0:54                             ` Davide Libenzi
2003-12-16 17:34                     ` Bill Davidsen
2003-12-15  5:53                 ` Rusty Russell
2003-12-15 23:08                   ` Nick Piggin
2003-12-19  4:57                     ` Nick Piggin
2003-12-19  5:13                       ` Nick Piggin
2003-12-20  2:43                       ` Rusty Russell
2003-12-21  2:56                         ` Nick Piggin
2004-01-03 18:57                   ` Bill Davidsen
2003-12-15 20:21                 ` Zwane Mwaikambo
2003-12-15 23:20                   ` Nick Piggin
2003-12-16  0:11                     ` Zwane Mwaikambo
2003-12-12  8:59             ` Nick Piggin
2003-12-12 15:14               ` Martin J. Bligh
2003-12-08 19:44 ` [PATCH][RFC] make cpu_sibling_map a cpumask_t James Cleverdon
2003-12-08 20:38 ` Ingo Molnar
2003-12-08 20:51 ` Zwane Mwaikambo
2003-12-08 20:55   ` Ingo Molnar
2003-12-08 23:17     ` Nick Piggin
2003-12-08 23:36       ` Ingo Molnar
2003-12-08 23:58         ` Nick Piggin
2003-12-08 23:46 ` Rusty Russell
2003-12-09 13:36   ` Nick Piggin
2003-12-11 21:41     ` bill davidsen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3FD921BD.5040208@cyberone.com.au \
    --to=piggin@cyberone.com.au \
    --cc=anton@samba.org \
    --cc=jun.nakajima@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=markw@osdl.org \
    --cc=mbligh@aracnet.com \
    --cc=mingo@redhat.com \
    --cc=rusty@rustcorp.com.au \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.