public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [1/4] standardize bit waiting data type
  2004-08-28  5:26 ` [0/4] standardized waitqueue hashing William Lee Irwin III
@ 2004-08-28  5:31   ` William Lee Irwin III
  2004-08-28  6:17     ` Andrew Morton
  2004-08-28  9:18     ` Christoph Hellwig
  0 siblings, 2 replies; 11+ messages in thread
From: William Lee Irwin III @ 2004-08-28  5:31 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Oleg Nesterov, linux-kernel

On Fri, Aug 27, 2004 at 10:26:27PM -0700, William Lee Irwin III wrote:
> The following patch series consolidates the various instances of
> waitqueue hashing to use a uniform structure and share the per-zone
> hashtable among all waitqueue hashers. This is expected to increase the
> number of hashtable buckets available for waiting on bh's and inodes
> and eliminate statically allocated kernel data structures for greater
> node locality and reduced kernel image size. Some attempt was made to
> look similar to Oleg Nesterov's suggested API in order to provide some
> kind of credit for independent invention of something very similar (the
> original versions of these patches predated my public postings on the
> subject of filtered waitqueues).

Eliminate specialized page and bh waitqueue hashing structures in favor
of a standardized structure, using wake_up_bit() to wake waiters using
the standardized wait_bit_key structure.

Index: mm1-2.6.9-rc1/fs/buffer.c
===================================================================
--- mm1-2.6.9-rc1.orig/fs/buffer.c	2004-08-26 15:04:08.000000000 -0700
+++ mm1-2.6.9-rc1/fs/buffer.c	2004-08-27 22:02:17.837872264 -0700
@@ -43,26 +43,6 @@
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
-struct bh_wait_queue {
-	struct buffer_head *bh;
-	wait_queue_t wait;
-};
-
-#define __DEFINE_BH_WAIT(name, b, f)					\
-	struct bh_wait_queue name = {					\
-		.bh	= b,						\
-		.wait	= {						\
-				.task	= current,			\
-				.flags	= f,				\
-				.func	= bh_wake_function,		\
-				.task_list =				\
-					LIST_HEAD_INIT(name.wait.task_list),\
-			},						\
-	}
-#define DEFINE_BH_WAIT(name, bh)	__DEFINE_BH_WAIT(name, bh, 0)
-#define DEFINE_BH_WAIT_EXCLUSIVE(name, bh) \
-		__DEFINE_BH_WAIT(name, bh, WQ_FLAG_EXCLUSIVE)
-
 /*
  * Hashed waitqueue_head's for wait_on_buffer()
  */
@@ -93,24 +73,10 @@
 	wait_queue_head_t *wq = bh_waitq_head(bh);
 
 	smp_mb();
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, bh);
+	__wake_up_bit(wq, &bh->b_state, BH_Lock);
 }
 EXPORT_SYMBOL(wake_up_buffer);
 
-static int bh_wake_function(wait_queue_t *wait, unsigned mode,
-				int sync, void *key)
-{
-	struct buffer_head *bh = key;
-	struct bh_wait_queue *wq;
-
-	wq = container_of(wait, struct bh_wait_queue, wait);
-	if (wq->bh != bh || buffer_locked(bh))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, key);
-}
-
 static void sync_buffer(struct buffer_head *bh)
 {
 	struct block_device *bd;
@@ -124,7 +90,7 @@
 void fastcall __lock_buffer(struct buffer_head *bh)
 {
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
-	DEFINE_BH_WAIT_EXCLUSIVE(wait, bh);
+	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock);
 
 	do {
 		prepare_to_wait_exclusive(wqh, &wait.wait,
@@ -153,15 +119,13 @@
 void __wait_on_buffer(struct buffer_head * bh)
 {
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
-	DEFINE_BH_WAIT(wait, bh);
+	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock);
 
-	do {
-		prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
-		if (buffer_locked(bh)) {
-			sync_buffer(bh);
-			io_schedule();
-		}
-	} while (buffer_locked(bh));
+	prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
+	if (buffer_locked(bh)) {
+		sync_buffer(bh);
+		io_schedule();
+	}
 	finish_wait(wqh, &wait.wait);
 }
 
Index: mm1-2.6.9-rc1/kernel/fork.c
===================================================================
--- mm1-2.6.9-rc1.orig/kernel/fork.c	2004-08-26 15:04:12.000000000 -0700
+++ mm1-2.6.9-rc1/kernel/fork.c	2004-08-27 22:02:13.647509296 -0700
@@ -227,6 +227,29 @@
 
 EXPORT_SYMBOL(autoremove_wake_function);
 
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue *wait_bit =
+		container_of(wait, struct wait_bit_queue, wait);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+void fastcall __wake_up_bit(wait_queue_head_t *wq, unsigned long *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq))
+		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
Index: mm1-2.6.9-rc1/mm/filemap.c
===================================================================
--- mm1-2.6.9-rc1.orig/mm/filemap.c	2004-08-27 09:11:29.000000000 -0700
+++ mm1-2.6.9-rc1/mm/filemap.c	2004-08-27 22:02:05.772706448 -0700
@@ -355,40 +355,6 @@
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-struct page_wait_queue {
-	struct page *page;
-	int bit;
-	wait_queue_t wait;
-};
-
-static int page_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
-	struct page *page = key;
-	struct page_wait_queue *wq;
-
-	wq = container_of(wait, struct page_wait_queue, wait);
-	if (wq->page != page || test_bit(wq->bit, &page->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, NULL);
-}
-
-#define __DEFINE_PAGE_WAIT(name, p, b, f)				\
-	struct page_wait_queue name = {					\
-		.page	= p,						\
-		.bit	= b,						\
-		.wait	= {						\
-			.task	= current,				\
-			.func	= page_wake_function,			\
-			.flags	= f,					\
-			.task_list = LIST_HEAD_INIT(name.wait.task_list),\
-		},							\
-	}
-
-#define DEFINE_PAGE_WAIT(name, p, b)	__DEFINE_PAGE_WAIT(name, p, b, 0)
-#define DEFINE_PAGE_WAIT_EXCLUSIVE(name, p, b)				\
-		__DEFINE_PAGE_WAIT(name, p, b, WQ_FLAG_EXCLUSIVE)
-
 static wait_queue_head_t *page_waitqueue(struct page *page)
 {
 	const struct zone *zone = page_zone(page);
@@ -396,27 +362,16 @@
 	return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
 }
 
-static void wake_up_page(struct page *page)
-{
-	const unsigned int mode = TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE;
-	wait_queue_head_t *waitqueue = page_waitqueue(page);
-
-	if (waitqueue_active(waitqueue))
-		__wake_up(waitqueue, mode, 1, page);
-}
-
 void fastcall wait_on_page_bit(struct page *page, int bit_nr)
 {
 	wait_queue_head_t *waitqueue = page_waitqueue(page);
-	DEFINE_PAGE_WAIT(wait, page, bit_nr);
+	DEFINE_WAIT_BIT(wait, (unsigned long *)&page->flags, bit_nr);
 
-	do {
-		prepare_to_wait(waitqueue, &wait.wait, TASK_UNINTERRUPTIBLE);
-		if (test_bit(bit_nr, &page->flags)) {
-			sync_page(page);
-			io_schedule();
-		}
-	} while (test_bit(bit_nr, &page->flags));
+	prepare_to_wait(waitqueue, &wait.wait, TASK_UNINTERRUPTIBLE);
+	if (test_bit(bit_nr, &page->flags)) {
+		sync_page(page);
+		io_schedule();
+	}
 	finish_wait(waitqueue, &wait.wait);
 }
 
@@ -439,11 +394,12 @@
  */
 void fastcall unlock_page(struct page *page)
 {
+	unsigned long *word = (unsigned long *)&page->flags;
 	smp_mb__before_clear_bit();
 	if (!TestClearPageLocked(page))
 		BUG();
 	smp_mb__after_clear_bit(); 
-	wake_up_page(page);
+	__wake_up_bit(page_waitqueue(page), word, PG_locked);
 }
 
 EXPORT_SYMBOL(unlock_page);
@@ -454,12 +410,13 @@
  */
 void end_page_writeback(struct page *page)
 {
+	unsigned long *word = (unsigned long *)&page->flags;
 	if (!TestClearPageReclaim(page) || rotate_reclaimable_page(page)) {
 		if (!test_clear_page_writeback(page))
 			BUG();
 		smp_mb__after_clear_bit();
 	}
-	wake_up_page(page);
+	__wake_up_bit(page_waitqueue(page), word, PG_writeback);
 }
 
 EXPORT_SYMBOL(end_page_writeback);
@@ -475,7 +432,7 @@
 void fastcall __lock_page(struct page *page)
 {
 	wait_queue_head_t *wqh = page_waitqueue(page);
-	DEFINE_PAGE_WAIT_EXCLUSIVE(wait, page, PG_locked);
+	DEFINE_WAIT_BIT(wait, (unsigned long *)&page->flags, PG_locked);
 
 	while (TestSetPageLocked(page)) {
 		prepare_to_wait_exclusive(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
Index: mm1-2.6.9-rc1/include/linux/wait.h
===================================================================
--- mm1-2.6.9-rc1.orig/include/linux/wait.h	2004-08-26 15:03:53.000000000 -0700
+++ mm1-2.6.9-rc1/include/linux/wait.h	2004-08-27 22:02:21.778273232 -0700
@@ -37,6 +37,16 @@
 	struct list_head task_list;
 };
 
+struct wait_bit_key {
+	unsigned long *flags;
+	int bit_nr;
+};
+
+struct wait_bit_queue {
+	struct wait_bit_key key;
+	wait_queue_t wait;
+};
+
 struct __wait_queue_head {
 	spinlock_t lock;
 	struct list_head task_list;
@@ -63,6 +73,9 @@
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
+	{ .flags = word, .bit_nr = bit, }
+
 static inline void init_waitqueue_head(wait_queue_head_t *q)
 {
 	q->lock = SPIN_LOCK_UNLOCKED;
@@ -125,6 +138,7 @@
 void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
 extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+void FASTCALL(__wake_up_bit(wait_queue_head_t *, unsigned long *, int));
 
 #define wake_up(x)			__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
@@ -277,6 +291,7 @@
 				wait_queue_t *wait, int state));
 void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT(name)						\
 	wait_queue_t name = {						\
@@ -287,6 +302,17 @@
 				},					\
 	}
 
+#define DEFINE_WAIT_BIT(name, word, bit)				\
+	struct wait_bit_queue name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
+		.wait	= {						\
+			.task		= current,			\
+			.func		= wake_bit_function,		\
+			.task_list	=				\
+				LIST_HEAD_INIT(name.wait.task_list),	\
+		},							\
+	}
+
 #define init_wait(wait)							\
 	do {								\
 		wait->task = current;					\

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  5:31   ` [1/4] standardize bit waiting data type William Lee Irwin III
@ 2004-08-28  6:17     ` Andrew Morton
  2004-08-28  6:34       ` William Lee Irwin III
  2004-08-28  9:18     ` Christoph Hellwig
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2004-08-28  6:17 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: oleg, linux-kernel

William Lee Irwin III <wli@holomorphy.com> wrote:
>
>  void fastcall unlock_page(struct page *page)
>   {
>  +	unsigned long *word = (unsigned long *)&page->flags;

This will break if a little-endian 64-bit architecture elects to use a
32-bit page_flags_t.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  6:17     ` Andrew Morton
@ 2004-08-28  6:34       ` William Lee Irwin III
  2004-08-28  6:40         ` Andrew Morton
  0 siblings, 1 reply; 11+ messages in thread
From: William Lee Irwin III @ 2004-08-28  6:34 UTC (permalink / raw)
  To: Andrew Morton; +Cc: oleg, linux-kernel

William Lee Irwin III <wli@holomorphy.com> wrote:
>> void fastcall unlock_page(struct page *page)
>>  {
>> +	unsigned long *word = (unsigned long *)&page->flags;

On Fri, Aug 27, 2004 at 11:17:13PM -0700, Andrew Morton wrote:
> This will break if a little-endian 64-bit architecture elects to use a
> 32-bit page_flags_t.

You mean a big-endian one? I did check to be sure none did so; only
x86-64 does. Easy enough to dress up so BE arches can do it too.


-- wli

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  6:34       ` William Lee Irwin III
@ 2004-08-28  6:40         ` Andrew Morton
  2004-08-28  6:48           ` William Lee Irwin III
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2004-08-28  6:40 UTC (permalink / raw)
  To: William Lee Irwin III; +Cc: oleg, linux-kernel

William Lee Irwin III <wli@holomorphy.com> wrote:
>
> William Lee Irwin III <wli@holomorphy.com> wrote:
> >> void fastcall unlock_page(struct page *page)
> >>  {
> >> +	unsigned long *word = (unsigned long *)&page->flags;
> 
> On Fri, Aug 27, 2004 at 11:17:13PM -0700, Andrew Morton wrote:
> > This will break if a little-endian 64-bit architecture elects to use a
> > 32-bit page_flags_t.
> 
> You mean a big-endian one? I did check to be sure none did so; only
> x86-64 does. Easy enough to dress up so BE arches can do it too.
> 

hm.  Actually, the page_flags_t hack can only work on little-endian
hardware anyway.

perhaps your implementation should imitate x86_64/bitops.h and use a void*,
along with apologetic comments.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  6:40         ` Andrew Morton
@ 2004-08-28  6:48           ` William Lee Irwin III
  2004-08-28  9:20             ` William Lee Irwin III
  0 siblings, 1 reply; 11+ messages in thread
From: William Lee Irwin III @ 2004-08-28  6:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: oleg, linux-kernel

William Lee Irwin III <wli@holomorphy.com> wrote:
>> You mean a big-endian one? I did check to be sure none did so; only
>> x86-64 does. Easy enough to dress up so BE arches can do it too.

On Fri, Aug 27, 2004 at 11:40:33PM -0700, Andrew Morton wrote:
> hm.  Actually, the page_flags_t hack can only work on little-endian
> hardware anyway.
> perhaps your implementation should imitate x86_64/bitops.h and use a void*,
> along with apologetic comments.

Okay, I'll resend it done that way.


-- wli

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  5:31   ` [1/4] standardize bit waiting data type William Lee Irwin III
  2004-08-28  6:17     ` Andrew Morton
@ 2004-08-28  9:18     ` Christoph Hellwig
  2004-08-28  9:20       ` William Lee Irwin III
  1 sibling, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2004-08-28  9:18 UTC (permalink / raw)
  To: William Lee Irwin III, Andrew Morton, Oleg Nesterov, linux-kernel


Why __wkae_up_bit, not wake_up_bit?


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [1/4] standardize bit waiting data type
  2004-08-28  6:48           ` William Lee Irwin III
@ 2004-08-28  9:20             ` William Lee Irwin III
  0 siblings, 0 replies; 11+ messages in thread
From: William Lee Irwin III @ 2004-08-28  9:20 UTC (permalink / raw)
  To: Andrew Morton; +Cc: oleg, linux-kernel

On Fri, Aug 27, 2004 at 11:40:33PM -0700, Andrew Morton wrote:
>> hm.  Actually, the page_flags_t hack can only work on little-endian
>> hardware anyway.
>> perhaps your implementation should imitate x86_64/bitops.h and use a void*,
>> along with apologetic comments.

On Fri, Aug 27, 2004 at 11:48:29PM -0700, William Lee Irwin III wrote:
> Okay, I'll resend it done that way.

I didn't see very many apologies in x86-64 so I wasn't entirely sure
what those would consist of; void * and some minor commentary added.
Let me know if there's something more specific I should add comments
about.

Thanks.


-- wli

Eliminate specialized page and bh waitqueue hashing structures in favor
of a standardized structure, using wake_up_bit() to wake waiters using
the standardized wait_bit_key structure.

Index: mm1-2.6.9-rc1/fs/buffer.c
===================================================================
--- mm1-2.6.9-rc1.orig/fs/buffer.c	2004-08-26 15:04:08.000000000 -0700
+++ mm1-2.6.9-rc1/fs/buffer.c	2004-08-27 23:50:07.644312184 -0700
@@ -43,26 +43,6 @@
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
-struct bh_wait_queue {
-	struct buffer_head *bh;
-	wait_queue_t wait;
-};
-
-#define __DEFINE_BH_WAIT(name, b, f)					\
-	struct bh_wait_queue name = {					\
-		.bh	= b,						\
-		.wait	= {						\
-				.task	= current,			\
-				.flags	= f,				\
-				.func	= bh_wake_function,		\
-				.task_list =				\
-					LIST_HEAD_INIT(name.wait.task_list),\
-			},						\
-	}
-#define DEFINE_BH_WAIT(name, bh)	__DEFINE_BH_WAIT(name, bh, 0)
-#define DEFINE_BH_WAIT_EXCLUSIVE(name, bh) \
-		__DEFINE_BH_WAIT(name, bh, WQ_FLAG_EXCLUSIVE)
-
 /*
  * Hashed waitqueue_head's for wait_on_buffer()
  */
@@ -93,24 +73,10 @@
 	wait_queue_head_t *wq = bh_waitq_head(bh);
 
 	smp_mb();
-	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, bh);
+	__wake_up_bit(wq, &bh->b_state, BH_Lock);
 }
 EXPORT_SYMBOL(wake_up_buffer);
 
-static int bh_wake_function(wait_queue_t *wait, unsigned mode,
-				int sync, void *key)
-{
-	struct buffer_head *bh = key;
-	struct bh_wait_queue *wq;
-
-	wq = container_of(wait, struct bh_wait_queue, wait);
-	if (wq->bh != bh || buffer_locked(bh))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, key);
-}
-
 static void sync_buffer(struct buffer_head *bh)
 {
 	struct block_device *bd;
@@ -124,7 +90,7 @@
 void fastcall __lock_buffer(struct buffer_head *bh)
 {
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
-	DEFINE_BH_WAIT_EXCLUSIVE(wait, bh);
+	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock);
 
 	do {
 		prepare_to_wait_exclusive(wqh, &wait.wait,
@@ -153,15 +119,13 @@
 void __wait_on_buffer(struct buffer_head * bh)
 {
 	wait_queue_head_t *wqh = bh_waitq_head(bh);
-	DEFINE_BH_WAIT(wait, bh);
+	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock);
 
-	do {
-		prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
-		if (buffer_locked(bh)) {
-			sync_buffer(bh);
-			io_schedule();
-		}
-	} while (buffer_locked(bh));
+	prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
+	if (buffer_locked(bh)) {
+		sync_buffer(bh);
+		io_schedule();
+	}
 	finish_wait(wqh, &wait.wait);
 }
 
Index: mm1-2.6.9-rc1/kernel/fork.c
===================================================================
--- mm1-2.6.9-rc1.orig/kernel/fork.c	2004-08-26 15:04:12.000000000 -0700
+++ mm1-2.6.9-rc1/kernel/fork.c	2004-08-28 01:20:04.105925320 -0700
@@ -227,6 +227,29 @@
 
 EXPORT_SYMBOL(autoremove_wake_function);
 
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
+{
+	struct wait_bit_key *key = arg;
+	struct wait_bit_queue *wait_bit
+		= container_of(wait, struct wait_bit_queue, wait);
+
+	if (wait_bit->key.flags != key->flags ||
+			wait_bit->key.bit_nr != key->bit_nr ||
+			test_bit(key->bit_nr, key->flags))
+		return 0;
+	else
+		return autoremove_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(wake_bit_function);
+
+void fastcall __wake_up_bit(wait_queue_head_t *wq, void *word, int bit)
+{
+	struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
+	if (waitqueue_active(wq))
+		__wake_up(wq, TASK_INTERRUPTIBLE|TASK_UNINTERRUPTIBLE, 1, &key);
+}
+EXPORT_SYMBOL(__wake_up_bit);
+
 void __init fork_init(unsigned long mempages)
 {
 #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
Index: mm1-2.6.9-rc1/mm/filemap.c
===================================================================
--- mm1-2.6.9-rc1.orig/mm/filemap.c	2004-08-27 09:11:29.000000000 -0700
+++ mm1-2.6.9-rc1/mm/filemap.c	2004-08-28 01:19:28.658314176 -0700
@@ -355,40 +355,6 @@
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-struct page_wait_queue {
-	struct page *page;
-	int bit;
-	wait_queue_t wait;
-};
-
-static int page_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
-	struct page *page = key;
-	struct page_wait_queue *wq;
-
-	wq = container_of(wait, struct page_wait_queue, wait);
-	if (wq->page != page || test_bit(wq->bit, &page->flags))
-		return 0;
-	else
-		return autoremove_wake_function(wait, mode, sync, NULL);
-}
-
-#define __DEFINE_PAGE_WAIT(name, p, b, f)				\
-	struct page_wait_queue name = {					\
-		.page	= p,						\
-		.bit	= b,						\
-		.wait	= {						\
-			.task	= current,				\
-			.func	= page_wake_function,			\
-			.flags	= f,					\
-			.task_list = LIST_HEAD_INIT(name.wait.task_list),\
-		},							\
-	}
-
-#define DEFINE_PAGE_WAIT(name, p, b)	__DEFINE_PAGE_WAIT(name, p, b, 0)
-#define DEFINE_PAGE_WAIT_EXCLUSIVE(name, p, b)				\
-		__DEFINE_PAGE_WAIT(name, p, b, WQ_FLAG_EXCLUSIVE)
-
 static wait_queue_head_t *page_waitqueue(struct page *page)
 {
 	const struct zone *zone = page_zone(page);
@@ -396,27 +362,16 @@
 	return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
 }
 
-static void wake_up_page(struct page *page)
-{
-	const unsigned int mode = TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE;
-	wait_queue_head_t *waitqueue = page_waitqueue(page);
-
-	if (waitqueue_active(waitqueue))
-		__wake_up(waitqueue, mode, 1, page);
-}
-
 void fastcall wait_on_page_bit(struct page *page, int bit_nr)
 {
 	wait_queue_head_t *waitqueue = page_waitqueue(page);
-	DEFINE_PAGE_WAIT(wait, page, bit_nr);
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
 
-	do {
-		prepare_to_wait(waitqueue, &wait.wait, TASK_UNINTERRUPTIBLE);
-		if (test_bit(bit_nr, &page->flags)) {
-			sync_page(page);
-			io_schedule();
-		}
-	} while (test_bit(bit_nr, &page->flags));
+	prepare_to_wait(waitqueue, &wait.wait, TASK_UNINTERRUPTIBLE);
+	if (test_bit(bit_nr, &page->flags)) {
+		sync_page(page);
+		io_schedule();
+	}
 	finish_wait(waitqueue, &wait.wait);
 }
 
@@ -443,7 +398,7 @@
 	if (!TestClearPageLocked(page))
 		BUG();
 	smp_mb__after_clear_bit(); 
-	wake_up_page(page);
+	__wake_up_bit(page_waitqueue(page), &page->flags, PG_locked);
 }
 
 EXPORT_SYMBOL(unlock_page);
@@ -459,7 +414,7 @@
 			BUG();
 		smp_mb__after_clear_bit();
 	}
-	wake_up_page(page);
+	__wake_up_bit(page_waitqueue(page), &page->flags, PG_writeback);
 }
 
 EXPORT_SYMBOL(end_page_writeback);
@@ -475,7 +430,7 @@
 void fastcall __lock_page(struct page *page)
 {
 	wait_queue_head_t *wqh = page_waitqueue(page);
-	DEFINE_PAGE_WAIT_EXCLUSIVE(wait, page, PG_locked);
+	DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
 
 	while (TestSetPageLocked(page)) {
 		prepare_to_wait_exclusive(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
Index: mm1-2.6.9-rc1/include/linux/wait.h
===================================================================
--- mm1-2.6.9-rc1.orig/include/linux/wait.h	2004-08-26 15:03:53.000000000 -0700
+++ mm1-2.6.9-rc1/include/linux/wait.h	2004-08-27 23:51:01.407138992 -0700
@@ -37,6 +37,16 @@
 	struct list_head task_list;
 };
 
+struct wait_bit_key {
+	void *flags;
+	int bit_nr;
+};
+
+struct wait_bit_queue {
+	struct wait_bit_key key;
+	wait_queue_t wait;
+};
+
 struct __wait_queue_head {
 	spinlock_t lock;
 	struct list_head task_list;
@@ -63,6 +73,9 @@
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
 	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
 
+#define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
+	{ .flags = word, .bit_nr = bit, }
+
 static inline void init_waitqueue_head(wait_queue_head_t *q)
 {
 	q->lock = SPIN_LOCK_UNLOCKED;
@@ -125,6 +138,7 @@
 void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
 extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
 
 #define wake_up(x)			__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr, NULL)
@@ -277,6 +291,7 @@
 				wait_queue_t *wait, int state));
 void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 #define DEFINE_WAIT(name)						\
 	wait_queue_t name = {						\
@@ -287,6 +302,17 @@
 				},					\
 	}
 
+#define DEFINE_WAIT_BIT(name, word, bit)				\
+	struct wait_bit_queue name = {					\
+		.key = __WAIT_BIT_KEY_INITIALIZER(word, bit),		\
+		.wait	= {						\
+			.task		= current,			\
+			.func		= wake_bit_function,		\
+			.task_list	=				\
+				LIST_HEAD_INIT(name.wait.task_list),	\
+		},							\
+	}
+
 #define init_wait(wait)							\
 	do {								\
 		wait->task = current;					\

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-08-28  9:18     ` Christoph Hellwig
@ 2004-08-28  9:20       ` William Lee Irwin III
  0 siblings, 0 replies; 11+ messages in thread
From: William Lee Irwin III @ 2004-08-28  9:20 UTC (permalink / raw)
  To: Christoph Hellwig, Andrew Morton, Oleg Nesterov, linux-kernel

On Sat, Aug 28, 2004 at 10:18:49AM +0100, Christoph Hellwig wrote:
> Why __wkae_up_bit, not wake_up_bit?

A wake_up_bit() meant to be used more generally is added later in the
series.


-- wli

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-09-03  9:53               ` [1/4] standardize bit waiting data type Martin Wilck
@ 2004-09-03  9:42                 ` William Lee Irwin III
  2004-09-03  9:55                   ` William Lee Irwin III
  0 siblings, 1 reply; 11+ messages in thread
From: William Lee Irwin III @ 2004-09-03  9:42 UTC (permalink / raw)
  To: Martin Wilck; +Cc: linux-kernel

William Lee Irwin III wrote:
>>+	prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
>>+	if (buffer_locked(bh)) {
>>+		sync_buffer(bh);
>>+		io_schedule();
>>+	}
>> 	finish_wait(wqh, &wait.wait);
>> }

On Fri, Sep 03, 2004 at 11:53:55AM +0200, Martin Wilck wrote:
> Why don't you need a do..while loop any more ?
> There is also no loop in __wait_on_bit() in the completed patch series.

Part of the point of filtered waitqueues is to reestablish wake-one
semantics. This means two things:
(a) those waiting merely for a bit to clear with no need to set it,
	i.e. all they want is to know a transition from set to
	clear occurred, are only woken once and don't need to loop
	waking and sleeping
(b) Of those tasks waiting for a bit to clear so they can set it
	exclusively, only one needs to be woken, and after the first
	is woken, it promises to clear the bit again, so there is no
	need to wake more tasks.

These two aspects of wake-one semantics give it highly attractive
performance characteristics.


-- wli

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
       [not found]             ` <2y7Hd-1aP-21@gated-at.bofh.it>
@ 2004-09-03  9:53               ` Martin Wilck
  2004-09-03  9:42                 ` William Lee Irwin III
  0 siblings, 1 reply; 11+ messages in thread
From: Martin Wilck @ 2004-09-03  9:53 UTC (permalink / raw)
  To: William Lee Irwin III, linux-kernel

William Lee Irwin III wrote:

> @@ -153,15 +119,13 @@
>  void __wait_on_buffer(struct buffer_head * bh)
>  {
>  	wait_queue_head_t *wqh = bh_waitq_head(bh);
> -	DEFINE_BH_WAIT(wait, bh);
> +	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Lock);
>  
> -	do {
> -		prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
> -		if (buffer_locked(bh)) {
> -			sync_buffer(bh);
> -			io_schedule();
> -		}
> -	} while (buffer_locked(bh));
> +	prepare_to_wait(wqh, &wait.wait, TASK_UNINTERRUPTIBLE);
> +	if (buffer_locked(bh)) {
> +		sync_buffer(bh);
> +		io_schedule();
> +	}
>  	finish_wait(wqh, &wait.wait);
>  }

Why don't you need a do..while loop any more ?

There is also no loop in __wait_on_bit() in the completed patch series.

Cheers
Martin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [1/4] standardize bit waiting data type
  2004-09-03  9:42                 ` William Lee Irwin III
@ 2004-09-03  9:55                   ` William Lee Irwin III
  0 siblings, 0 replies; 11+ messages in thread
From: William Lee Irwin III @ 2004-09-03  9:55 UTC (permalink / raw)
  To: Martin Wilck; +Cc: linux-kernel

On Fri, Sep 03, 2004 at 11:53:55AM +0200, Martin Wilck wrote:
>> Why don't you need a do..while loop any more ?
>> There is also no loop in __wait_on_bit() in the completed patch series.

On Fri, Sep 03, 2004 at 02:42:47AM -0700, William Lee Irwin III wrote:
> Part of the point of filtered waitqueues is to reestablish wake-one
> semantics. This means two things:
> (a) those waiting merely for a bit to clear with no need to set it,
> 	i.e. all they want is to know a transition from set to
> 	clear occurred, are only woken once and don't need to loop
> 	waking and sleeping
> (b) Of those tasks waiting for a bit to clear so they can set it
> 	exclusively, only one needs to be woken, and after the first
> 	is woken, it promises to clear the bit again, so there is no
> 	need to wake more tasks.

Also, (a) still works in the presence of signals with interruptible
waits (which the VM and VFS do not now use); the sleeping function is
required to return -EINTR or some other nonzero value to indicate
abnormal termination, which in turn must be checked by the caller.


-- wli

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2004-09-03 10:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <2xoKb-2Pa-27@gated-at.bofh.it>
     [not found] ` <2y3X5-73V-37@gated-at.bofh.it>
     [not found]   ` <2y46A-798-17@gated-at.bofh.it>
     [not found]     ` <2y4T1-7GM-17@gated-at.bofh.it>
     [not found]       ` <2y52E-7Li-11@gated-at.bofh.it>
     [not found]         ` <2y5ci-7Qz-7@gated-at.bofh.it>
     [not found]           ` <2y5m3-7VH-5@gated-at.bofh.it>
     [not found]             ` <2y7Hd-1aP-21@gated-at.bofh.it>
2004-09-03  9:53               ` [1/4] standardize bit waiting data type Martin Wilck
2004-09-03  9:42                 ` William Lee Irwin III
2004-09-03  9:55                   ` William Lee Irwin III
2004-08-26  8:47 2.6.9-rc1-mm1 Andrew Morton
2004-08-28  5:26 ` [0/4] standardized waitqueue hashing William Lee Irwin III
2004-08-28  5:31   ` [1/4] standardize bit waiting data type William Lee Irwin III
2004-08-28  6:17     ` Andrew Morton
2004-08-28  6:34       ` William Lee Irwin III
2004-08-28  6:40         ` Andrew Morton
2004-08-28  6:48           ` William Lee Irwin III
2004-08-28  9:20             ` William Lee Irwin III
2004-08-28  9:18     ` Christoph Hellwig
2004-08-28  9:20       ` William Lee Irwin III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox