public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@cambridge.redhat.com>
To: Linus Torvalds <torvalds@transmeta.com>,
	Andrew Morton <andrewm@uow.edu.au>, Ben LaHaise <bcrl@redhat.com>,
	Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: David Howells <dhowells@cambridge.redhat.com>,
	Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH] i386 rw_semaphores fix
Date: Tue, 10 Apr 2001 19:02:42 +0100	[thread overview]
Message-ID: <11851.986925762@warthog.cambridge.redhat.com> (raw)
In-Reply-To: Your message of "Tue, 10 Apr 2001 08:47:34 BST." <8623.986888854@warthog.cambridge.redhat.com>

[-- Attachment #1: Type: text/plain, Size: 1154 bytes --]

Here's a patch that fixes RW semaphores on the i386 architecture. It is very
simple in the way it works.

The lock counter is dealt with as two semi-independent words: the LSW is the
number of active (granted) locks, and the MSW, if negated, is the number of
active writers (0 or 1) plus the number of waiting lockers of any type.

The fast paths are either two or three instructions long.

This algorithm should also be totally fair! Contentious lockers get put on the
back of the wait queue, and a waker function wakes them starting at the front,
but only wakes either one writer or the first consecutive bundle of readers.

The disadvantage is that the maximum number of active locks is 65535, and the
maximum number of waiting locks is 32766 (this can be extended to 65534 by not
relying on the S flag).

I've included a revised testing module (rwsem.c) that allows read locks to be
obtained as well as write locks and a revised driver program (driver.c) that
can use rwsem.c. Try the following tests:

	driver -200 & driver 200 # fork 200 writers and then 200 readers
	driver 200 & driver -200 # fork 200 readers and then 200 writers

David Howells

[-- Attachment #2: rw-semaphore patch --]
[-- Type: text/plain, Size: 22219 bytes --]

diff -uNr linux-2.4.3/arch/i386/kernel/semaphore.c linux/arch/i386/kernel/semaphore.c
--- linux-2.4.3/arch/i386/kernel/semaphore.c	Thu Apr  5 14:38:34 2001
+++ linux/arch/i386/kernel/semaphore.c	Tue Apr 10 18:23:55 2001
@@ -14,7 +14,6 @@
  */
 #include <linux/config.h>
 #include <linux/sched.h>
-
 #include <asm/semaphore.h>
 
 /*
@@ -237,19 +236,10 @@
 __down_read_failed:
 	pushl	%edx
 	pushl	%ecx
-	jnc	2f
-
-3:	call	down_read_failed_biased
-
-1:	popl	%ecx
+	call	down_read_failed
+	popl	%ecx
 	popl	%edx
 	ret
-
-2:	call	down_read_failed
-	" LOCK "subl	$1,(%eax)
-	jns	1b
-	jnc	2b
-	jmp	3b
 "
 );
 
@@ -260,169 +250,204 @@
 __down_write_failed:
 	pushl	%edx
 	pushl	%ecx
-	jnc	2f
-
-3:	call	down_write_failed_biased
-
-1:	popl	%ecx
+	call	down_write_failed
+	popl	%ecx
 	popl	%edx
 	ret
-
-2:	call	down_write_failed
-	" LOCK "subl	$" RW_LOCK_BIAS_STR ",(%eax)
-	jz	1b
-	jnc	2b
-	jmp	3b
 "
 );
 
-struct rw_semaphore *FASTCALL(rwsem_wake_readers(struct rw_semaphore *sem));
-struct rw_semaphore *FASTCALL(rwsem_wake_writer(struct rw_semaphore *sem));
+asm(
+"
+.align 4
+.globl __rwsem_wake
+__rwsem_wake:
+	pushl	%edx
+	pushl	%ecx
+	call	rwsem_wake
+	popl	%ecx
+	popl	%edx
+	ret
+"
+);
 
-struct rw_semaphore *FASTCALL(down_read_failed_biased(struct rw_semaphore *sem));
-struct rw_semaphore *FASTCALL(down_write_failed_biased(struct rw_semaphore *sem));
+struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *sem));
 struct rw_semaphore *FASTCALL(down_read_failed(struct rw_semaphore *sem));
 struct rw_semaphore *FASTCALL(down_write_failed(struct rw_semaphore *sem));
 
-struct rw_semaphore *down_read_failed_biased(struct rw_semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 
-	add_wait_queue(&sem->wait, &wait);	/* put ourselves at the head of the list */
-
-	for (;;) {
-		if (sem->read_bias_granted && xchg(&sem->read_bias_granted, 0))
-			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (!sem->read_bias_granted)
-			schedule();
-	}
+static inline int atomic_add_and_read_orig(int delta, atomic_t *v)
+{
+	int oldmem;
 
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
+	__asm__ __volatile__(
+		LOCK_PREFIX "xadd %0,%2"
+		: "=r"(oldmem)
+		: "r0"(delta), "m"(*__xg(v))
+		: "memory");
 
-	return sem;
+	return oldmem;
 }
 
-struct rw_semaphore *down_write_failed_biased(struct rw_semaphore *sem)
+static inline int atomic_add_and_read(int delta, atomic_t *v)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	add_wait_queue_exclusive(&sem->write_bias_wait, &wait);	/* put ourselves at the end of the list */
-
-	for (;;) {
-		if (sem->write_bias_granted && xchg(&sem->write_bias_granted, 0))
-			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (!sem->write_bias_granted)
-			schedule();
-	}
-
-	remove_wait_queue(&sem->write_bias_wait, &wait);
-	tsk->state = TASK_RUNNING;
+	return atomic_add_and_read_orig(delta,v)+delta;
+}
 
-	/* if the lock is currently unbiased, awaken the sleepers
-	 * FIXME: this wakes up the readers early in a bit of a
-	 * stampede -> bad!
-	 */
-	if (atomic_read(&sem->count) >= 0)
-		wake_up(&sem->wait);
+static inline int atomic_sub_and_read_orig(int delta, atomic_t *v)
+{
+	return atomic_add_and_read_orig(-delta,v);
+}
 
-	return sem;
+static inline int atomic_sub_and_read(int delta, atomic_t *v)
+{
+	return atomic_add_and_read_orig(-delta,v)-delta;
 }
 
-/* Wait for the lock to become unbiased.  Readers
- * are non-exclusive. =)
+/*
+ * wait for the read lock to be granted
+ * - need to repeal the increment made inline by the caller
+ * - need to throw a write-lock style spanner into the works (sub 0x00010000 from count)
  */
 struct rw_semaphore *down_read_failed(struct rw_semaphore *sem)
 {
 	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
+	DECLARE_WAITQUEUE(wait,tsk);
+	int count;
 
-	__up_read(sem);	/* this takes care of granting the lock */
+	rwsemdebug("[%d] Entering down_read_failed(%08x)\n",current->pid,atomic_read(&sem->count));
 
-	add_wait_queue(&sem->wait, &wait);
+	/* this waitqueue context flag will be cleared when we are granted the lock */
+	__set_bit(RWSEM_WAITING_FOR_READ,&wait.flags);
 
-	while (atomic_read(&sem->count) < 0) {
+	add_wait_queue_exclusive(&sem->wait, &wait); /* FIFO */
+
+	/* note that we're now waiting on the lock, but no longer actively read-locking */
+	count = atomic_add_and_read(RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS,&sem->count);
+	rwsemdebug("X(%08x)\n",count);
+
+	/* if there are no longer active locks, wake the front queued process(es) up
+	 * - it might even be this process, since the waker takes a more active part
+	 */
+	if (!(count & RWSEM_ACTIVE_MASK))
+		rwsem_wake(sem);
+
+	/* wait to be given the lock */
+	for (;;) {
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&sem->count) >= 0)
+		if (!test_bit(RWSEM_WAITING_FOR_READ,&wait.flags))
 			break;
 		schedule();
 	}
 
-	remove_wait_queue(&sem->wait, &wait);
+	remove_wait_queue(&sem->wait,&wait);
 	tsk->state = TASK_RUNNING;
 
+	rwsemdebug("[%d] Leaving down_read_failed(%08x)\n",current->pid,atomic_read(&sem->count));
+
 	return sem;
 }
 
-/* Wait for the lock to become unbiased. Since we're
- * a writer, we'll make ourselves exclusive.
+/*
+ * wait for the write lock to be granted
  */
 struct rw_semaphore *down_write_failed(struct rw_semaphore *sem)
 {
 	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
+	DECLARE_WAITQUEUE(wait,tsk);
+	int count;
 
-	__up_write(sem);	/* this takes care of granting the lock */
+	rwsemdebug("[%d] Entering down_write_failed(%08x)\n",
+		   current->pid,atomic_read(&sem->count));
 
-	add_wait_queue_exclusive(&sem->wait, &wait);
+	/* this waitqueue context flag will be cleared when we are granted the lock */
+	__set_bit(RWSEM_WAITING_FOR_WRITE,&wait.flags);
 
-	while (atomic_read(&sem->count) < 0) {
+	add_wait_queue_exclusive(&sem->wait, &wait); /* FIFO */
+
+	/* note that we're waiting on the lock, but no longer actively locking */
+	count = atomic_add_and_read(-RWSEM_ACTIVE_BIAS,&sem->count);
+	rwsemdebug("A(%08x)\n",count);
+
+	/* if there are no longer active locks, wake the front queued process(es) up
+	 * - it might even be this process, since the waker takes a more active part
+	 */
+	if (!(count & RWSEM_ACTIVE_MASK))
+		rwsem_wake(sem);
+
+	/* wait to be given the lock */
+	for (;;) {
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&sem->count) >= 0)
-			break;	/* we must attempt to acquire or bias the lock */
+		if (!test_bit(RWSEM_WAITING_FOR_WRITE,&wait.flags))
+			break;
 		schedule();
 	}
 
-	remove_wait_queue(&sem->wait, &wait);
+	remove_wait_queue(&sem->wait,&wait);
 	tsk->state = TASK_RUNNING;
 
+	rwsemdebug("[%d] Leaving down_write_failed(%08x)\n",current->pid,atomic_read(&sem->count));
+
 	return sem;
 }
 
-asm(
-"
-.align 4
-.globl __rwsem_wake
-__rwsem_wake:
-	pushl	%edx
-	pushl	%ecx
-
-	jz	1f
-	call	rwsem_wake_readers
-	jmp	2f
+/*
+ * handle the lock being released whilst there are processes blocked on it that can now run
+ * - if we come here, then:
+ *   - the 'active part' of the count (&0x0000ffff) reached zero (but may no longer be zero)
+ *   - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
+ */
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+{
+	int count, woken;
 
-1:	call	rwsem_wake_writer
+	rwsemdebug("[%d] Entering rwsem_wake(%08x)\n",current->pid,atomic_read(&sem->count));
 
-2:	popl	%ecx
-	popl	%edx
-	ret
-"
-);
+	/* try to grab an 'activity' marker
+	 * - need to make sure two copies of rwsem_wake() don't do this for two separate processes
+	 *   simultaneously
+	 * - be horribly naughty, and only deal with the LSW of the atomic counter
+	 */
+	if (cmpxchg((__u16*)&sem->count.counter,0,RWSEM_ACTIVE_BIAS)!=0)
+		goto out;
 
-/* Called when someone has done an up that transitioned from
- * negative to non-negative, meaning that the lock has been
- * granted to whomever owned the bias.
- */
-struct rw_semaphore *rwsem_wake_readers(struct rw_semaphore *sem)
-{
-	if (xchg(&sem->read_bias_granted, 1))
+	/* try to grant a single write lock if there's a writer at the front of the queue
+	 * - note we leave the 'active part' of the count incremented by 1 and the waiting part
+	 *   incremented by 0x00010000
+	 */
+	switch (wake_up_ctx(&sem->wait,1,-RWSEM_WAITING_FOR_WRITE)) {
+	case 1:
+		goto out;
+	case 0:
+		break;
+	default:
 		BUG();
-	wake_up(&sem->wait);
-	return sem;
-}
+	}
 
-struct rw_semaphore *rwsem_wake_writer(struct rw_semaphore *sem)
-{
-	if (xchg(&sem->write_bias_granted, 1))
+	rwsemdebug("E\n");
+
+	/* grant an infinite number of read locks to the readers at the front of the queue
+	 * - note we increment the 'active part' of the count by the number of readers just woken,
+	 *   less one for the activity decrement we've already done
+	 */
+	woken = wake_up_ctx(&sem->wait,65535,-RWSEM_WAITING_FOR_READ);
+	if (woken>0) {
+		woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS;
+		woken -= RWSEM_ACTIVE_BIAS;
+		atomic_add(woken,&sem->count);
+	}
+	else {
 		BUG();
-	wake_up(&sem->write_bias_wait);
+	}
+
+ out:
+	rwsemdebug("[%d] Leaving rwsem_wake(%08x)\n",current->pid,atomic_read(&sem->count));
 	return sem;
 }
 
+/*
+ * rw spinlock fallbacks
+ */
 #if defined(CONFIG_SMP)
 asm(
 "
@@ -451,4 +476,3 @@
 "
 );
 #endif
-
diff -uNr linux-2.4.3/include/asm-i386/semaphore.h linux/include/asm-i386/semaphore.h
--- linux-2.4.3/include/asm-i386/semaphore.h	Thu Apr  5 14:50:36 2001
+++ linux/include/asm-i386/semaphore.h	Tue Apr 10 18:37:20 2001
@@ -208,31 +208,42 @@
  * spinlocks and semaphores together, and this is what we
  * end up with...
  *
- * The lock is initialized to BIAS.  This way, a writer
- * subtracts BIAS ands gets 0 for the case of an uncontended
- * lock.  Readers decrement by 1 and see a positive value
- * when uncontended, negative if there are writers waiting
- * (in which case it goes to sleep).
- *
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that subl'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- *
- * In terms of fairness, this should result in the lock
- * flopping back and forth between readers and writers
- * under heavy use.
+ * The MSW of the count is the negated number of active writers and waiting
+ * lockers, and the LSW is the total number of active locks
  *
- *		-ben
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+ * uncontended lock. This can be determined because XADD returns the old value.
+ * Readers increment by 1 and see a positive value when uncontended, negative
+ * if there are writers (and maybe) readers waiting (in which case it goes to
+ * sleep).
+ *
+ * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+ * be extended to 65534 by manually checking the whole MSW rather than relying
+ * on the S flag.
+ *
+ * The value of ACTIVE_BIAS supports up to 65535 active processes.
+ *
+ * This should be totally fair - if anything is waiting, a process that wants a
+ * lock will go to the back of the queue. When the currently active lock is
+ * released, if there's a writer at the front of the queue, then that and only
+ * that will be woken up; if there's a bunch of consecutive readers at the
+ * front, then they'll all be woken up, but no other readers will be.
+ *
+ * [dhowells@redhat.com]
  */
 struct rw_semaphore {
 	atomic_t		count;
-	volatile unsigned char	write_bias_granted;
-	volatile unsigned char	read_bias_granted;
-	volatile unsigned char	pad1;
-	volatile unsigned char	pad2;
+#define RWSEM_UNLOCKED_VALUE		0x00000000
+#define RWSEM_ACTIVE_BIAS		0x00000001
+#define RWSEM_ACTIVE_MASK		0x0000ffff
+#define RWSEM_WAITING_BIAS		(-0x00010000)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	wait_queue_head_t	wait;
-	wait_queue_head_t	write_bias_wait;
+#define RWSEM_WAITING_FOR_READ	WQ_FLAG_CONTEXT_0	/* bits to use in wait_queue_t.flags */
+#define RWSEM_WAITING_FOR_WRITE	WQ_FLAG_CONTEXT_1
 #if WAITQUEUE_DEBUG
 	long			__magic;
 	atomic_t		readers;
@@ -240,6 +251,9 @@
 #endif
 };
 
+#define rwsemdebug(FMT,...)
+//#define rwsemdebug printk
+
 #if WAITQUEUE_DEBUG
 #define __RWSEM_DEBUG_INIT	, ATOMIC_INIT(0), ATOMIC_INIT(0)
 #else
@@ -247,8 +261,7 @@
 #endif
 
 #define __RWSEM_INITIALIZER(name,count) \
-{ ATOMIC_INIT(count), 0, 0, 0, 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait), \
-	__WAIT_QUEUE_HEAD_INITIALIZER((name).write_bias_wait) \
+{ ATOMIC_INIT(RWSEM_UNLOCKED_VALUE), __WAIT_QUEUE_HEAD_INITIALIZER((name).wait), \
 	__SEM_DEBUG_INIT(name) __RWSEM_DEBUG_INIT }
 
 #define __DECLARE_RWSEM_GENERIC(name,count) \
@@ -260,11 +273,8 @@
 
 static inline void init_rwsem(struct rw_semaphore *sem)
 {
-	atomic_set(&sem->count, RW_LOCK_BIAS);
-	sem->read_bias_granted = 0;
-	sem->write_bias_granted = 0;
+	atomic_set(&sem->count, RWSEM_UNLOCKED_VALUE);
 	init_waitqueue_head(&sem->wait);
-	init_waitqueue_head(&sem->write_bias_wait);
 #if WAITQUEUE_DEBUG
 	sem->__magic = (long)&sem->__magic;
 	atomic_set(&sem->readers, 0);
@@ -283,7 +293,23 @@
 	if (sem->__magic != (long)&sem->__magic)
 		BUG();
 #endif
-	__build_read_lock(sem, "__down_read_failed");
+	rwsemdebug("Entering down_read(count=%08x)\n",atomic_read(&sem->count));
+	__asm__ __volatile__(
+		"# beginning down_read\n\t"
+LOCK_PREFIX	"  incl      (%1)\n\t" /* adds 0x00000001, returns the old value */
+		"  js        2f\n\t" /* jump if we weren't granted the lock */
+		"1:\n\t"
+		".section .text.lock,\"ax\"\n"
+		"2:\n\t"
+		"  call     __down_read_failed\n\t"
+		"  jmp      1b\n"
+		".previous"
+		"# ending down_read\n\t"
+		: "=a"(sem)
+		: "a0"(sem)
+		: "memory");
+
+	rwsemdebug("Leaving down_read(count=%08x)\n",atomic_read(&sem->count));
 #if WAITQUEUE_DEBUG
 	if (sem->write_bias_granted)
 		BUG();
@@ -295,11 +321,32 @@
 
 static inline void down_write(struct rw_semaphore *sem)
 {
+	int tmp;
+
 #if WAITQUEUE_DEBUG
 	if (sem->__magic != (long)&sem->__magic)
 		BUG();
 #endif
-	__build_write_lock(sem, "__down_write_failed");
+	rwsemdebug("Entering down_write(count=%08x)\n",atomic_read(&sem->count));
+
+	__asm__ __volatile__(
+		"# beginning down_write\n\t"
+LOCK_PREFIX	"  xadd      %0,(%1)\n\t" /* subtract 0x00010001, returns the old value */
+		"  testl     %0,%0\n\t" /* was the count 0 before? */
+		"  jnz       2f\n\t" /* jump if we weren't granted the lock */
+		"1:\n\t"
+		".section .text.lock,\"ax\"\n"
+		"2:\n\t"
+		"  call     __down_write_failed\n\t"
+		"  jmp      1b\n"
+		".previous\n"
+		"# ending down_write"
+		: "=r"(tmp), "=a"(sem)
+		: "r0"(RWSEM_ACTIVE_WRITE_BIAS), "a1"(sem)
+		: "memory");
+
+	rwsemdebug("Leaving down_write(count=%08x)\n",atomic_read(&sem->count));
+
 #if WAITQUEUE_DEBUG
 	if (atomic_read(&sem->writers))
 		BUG();
@@ -319,19 +366,29 @@
  */
 static inline void __up_read(struct rw_semaphore *sem)
 {
+	int tmp;
+
+	rwsemdebug("Entering up_read(count=%08x)\n",atomic_read(&sem->count));
+
 	__asm__ __volatile__(
-		"# up_read\n\t"
-		LOCK "incl %0\n\t"
-		"jz 2f\n"			/* only do the wake if result == 0 (ie, a writer) */
+		"# beginning __up_read\n\t"
+LOCK_PREFIX	"  xadd      %0,(%1)\n\t" /* subtracts 1, returns the old value */
+		"  js        2f\n\t" /* jump if the lock is being waited upon */
 		"1:\n\t"
 		".section .text.lock,\"ax\"\n"
-		"2:\tcall __rwsem_wake\n\t"
-		"jmp 1b\n"
-		".previous"
-		:"=m" (sem->count)
-		:"a" (sem)
-		:"memory"
-		);
+		"2:\n\t"
+		"  decl      %0\n\t" /* xadd gave us the old count */
+		"  testl     %4,%0\n\t" /* do nothing if still outstanding active readers */
+		"  jnz       1b\n\t"
+		"  call     __rwsem_wake\n\t"
+		"  jmp      1b\n"
+		".previous\n"
+		"# ending __up_read\n"
+		: "=r"(tmp), "=a"(sem)
+		: "r0"(-RWSEM_ACTIVE_READ_BIAS), "a1"(sem), "i"(RWSEM_ACTIVE_MASK)
+		: "memory");
+
+	rwsemdebug("Leaving up_read(count=%08x)\n",atomic_read(&sem->count));
 }
 
 /* releasing the writer is easy -- just release it and
@@ -339,19 +396,26 @@
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
+	int eax;
+
+	rwsemdebug("Entering __up_write(count=%08x)\n",atomic_read(&sem->count));
+
 	__asm__ __volatile__(
-		"# up_write\n\t"
-		LOCK "addl $" RW_LOCK_BIAS_STR ",%0\n"
-		"jc 2f\n"			/* only do the wake if the result was -'ve to 0/+'ve */
+		"# beginning __up_write\n\t"
+LOCK_PREFIX	"  addl      %2,(%0)\n\t" /* adds 0x00010001 */
+		"  js        2f\n\t" /* jump if the lock is being waited upon */
 		"1:\n\t"
 		".section .text.lock,\"ax\"\n"
-		"2:\tcall __rwsem_wake\n\t"
-		"jmp 1b\n"
-		".previous"
-		:"=m" (sem->count)
-		:"a" (sem)
-		:"memory"
-		);
+		"2:\n\t"
+		"  call     __rwsem_wake\n\t"
+		"  jmp      1b\n"
+		".previous\n"
+		"# ending __up_write\n"
+		: "=a"(eax)
+		: "a0"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS)
+		: "memory");
+
+	rwsemdebug("Leaving __up_write(count=%08x)\n",atomic_read(&sem->count));
 }
 
 static inline void up_read(struct rw_semaphore *sem)
diff -uNr linux-2.4.3/include/linux/sched.h linux/include/linux/sched.h
--- linux-2.4.3/include/linux/sched.h	Thu Apr  5 14:50:36 2001
+++ linux/include/linux/sched.h	Tue Apr 10 17:32:38 2001
@@ -548,6 +548,8 @@
 
 extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
 extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
+extern int FASTCALL(__wake_up_ctx(wait_queue_head_t *q, unsigned int mode, int count, int bit));
+extern int FASTCALL(__wake_up_sync_ctx(wait_queue_head_t *q, unsigned int mode, int count, int bit));
 extern void FASTCALL(sleep_on(wait_queue_head_t *q));
 extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
 				      signed long timeout));
@@ -566,6 +568,8 @@
 #define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
 #define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
 #define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
+#define wake_up_ctx(x,count,bit)	__wake_up_ctx((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,count,bit)
+#define wake_up_sync_ctx(x,count,bit)	__wake_up_ctx((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,count,bit)
 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
 
 extern int in_group_p(gid_t);
diff -uNr linux-2.4.3/include/linux/wait.h linux/include/linux/wait.h
--- linux-2.4.3/include/linux/wait.h	Thu Apr  5 14:50:36 2001
+++ linux/include/linux/wait.h	Tue Apr 10 10:18:30 2001
@@ -26,6 +26,14 @@
 struct __wait_queue {
 	unsigned int flags;
 #define WQ_FLAG_EXCLUSIVE	0x01
+#define WQ_FLAG_CONTEXT_0	8	/* context specific flag bit numbers */
+#define WQ_FLAG_CONTEXT_1	9
+#define WQ_FLAG_CONTEXT_2	10
+#define WQ_FLAG_CONTEXT_3	11
+#define WQ_FLAG_CONTEXT_4	12
+#define WQ_FLAG_CONTEXT_5	13
+#define WQ_FLAG_CONTEXT_6	14
+#define WQ_FLAG_CONTEXT_7	15
 	struct task_struct * task;
 	struct list_head task_list;
 #if WAITQUEUE_DEBUG
diff -uNr linux-2.4.3/kernel/fork.c linux/kernel/fork.c
--- linux-2.4.3/kernel/fork.c	Thu Apr  5 14:44:17 2001
+++ linux/kernel/fork.c	Tue Apr 10 09:19:47 2001
@@ -39,7 +39,7 @@
 	unsigned long flags;
 
 	wq_write_lock_irqsave(&q->lock, flags);
-	wait->flags = 0;
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue(q, wait);
 	wq_write_unlock_irqrestore(&q->lock, flags);
 }
@@ -49,7 +49,7 @@
 	unsigned long flags;
 
 	wq_write_lock_irqsave(&q->lock, flags);
-	wait->flags = WQ_FLAG_EXCLUSIVE;
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
 	__add_wait_queue_tail(q, wait);
 	wq_write_unlock_irqrestore(&q->lock, flags);
 }
diff -uNr linux-2.4.3/kernel/sched.c linux/kernel/sched.c
--- linux-2.4.3/kernel/sched.c	Thu Apr  5 14:44:17 2001
+++ linux/kernel/sched.c	Tue Apr 10 15:25:44 2001
@@ -739,7 +739,7 @@
 		state = p->state;
 		if (state & mode) {
 			WQ_NOTE_WAKER(curr);
-			if (try_to_wake_up(p, sync) && curr->flags && !--nr_exclusive)
+			if (try_to_wake_up(p, sync) && (curr->flags&WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 				break;
 		}
 	}
@@ -763,6 +763,80 @@
 		__wake_up_common(q, mode, nr, 1);
 		wq_read_unlock_irqrestore(&q->lock, flags);
 	}
+}
+
+/*
+ * wake up processes in the wait queue depending on the state of a context bit in the flags
+ * - wakes up a process if the specified bit is set in the flags member
+ * - the context bit is cleared if the process is woken up
+ * - if the bit number is negative, then the loop stops at the first unset context bit encountered
+ * - returns the number of processes woken
+ */
+static inline int __wake_up_ctx_common (wait_queue_head_t *q, unsigned int mode,
+					int count, int bit, const int sync)
+{
+	struct list_head *tmp, *head;
+	struct task_struct *p;
+	int stop, woken;
+
+	woken = 0;
+	stop = bit<0;
+	if (bit<0) bit = -bit;
+
+	CHECK_MAGIC_WQHEAD(q);
+	head = &q->task_list;
+	WQ_CHECK_LIST_HEAD(head);
+	tmp = head->next;
+	while (tmp != head) {
+		unsigned int state;
+                wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+
+		tmp = tmp->next;
+		CHECK_MAGIC(curr->__magic);
+		p = curr->task;
+		state = p->state;
+		if (state & mode) {
+			if (!test_and_clear_bit(bit,&curr->flags)) {
+				if (stop)
+					break;
+				continue;
+			}
+
+			WQ_NOTE_WAKER(curr);
+			if (!try_to_wake_up(p, sync) || !(curr->flags&WQ_FLAG_EXCLUSIVE))
+				continue;
+
+			woken++;
+			if (woken>=count)
+				break;
+		}
+	}
+
+	return woken;
+}
+
+int __wake_up_ctx(wait_queue_head_t *q, unsigned int mode, int count, int bit)
+{
+	int woken = 0;
+	if (q && count) {
+		unsigned long flags;
+		wq_read_lock_irqsave(&q->lock, flags);
+		woken = __wake_up_ctx_common(q, mode, count, bit, 0);
+		wq_read_unlock_irqrestore(&q->lock, flags);
+	}
+	return woken;
+}
+
+int __wake_up_ctx_sync(wait_queue_head_t *q, unsigned int mode, int count, int bit)
+{
+	int woken = 0;
+	if (q && count) {
+		unsigned long flags;
+		wq_read_lock_irqsave(&q->lock, flags);
+		woken = __wake_up_ctx_common(q, mode, count, bit, 1);
+		wq_read_unlock_irqrestore(&q->lock, flags);
+	}
+	return woken;
 }
 
 #define	SLEEP_ON_VAR				\

[-- Attachment #3: rw-semaphore testing module --]
[-- Type: text/plain, Size: 1900 bytes --]

#define __NO_VERSION__
#include <linux/config.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/personality.h>
#include <linux/smp_lock.h>
#include <linux/delay.h>

#define kdebug(FMT,...)
//#define kdebug printk

MODULE_AUTHOR("David Howells");
MODULE_DESCRIPTION("R/W semaphore test demo");


struct proc_dir_entry *rwsem_proc;

struct rw_semaphore rwsem_sem;

static int rwsem_read_proc(char *page, char **start, off_t off,
			   int count, int *eof, void *data)
{
	kdebug("[%d] r-downing: %08x\n",current->pid,rwsem_sem.count.counter);
	down_read(&rwsem_sem);
	kdebug("[%d] r-downed: %08x\n",current->pid,rwsem_sem.count.counter);
	mdelay(2);
	kdebug("[%d] r-upping: %08x\n",current->pid,rwsem_sem.count.counter);
	up_read(&rwsem_sem);
	kdebug("[%d] r-upped: %08x\n",current->pid,rwsem_sem.count.counter);

	return -ENOENT;
}

static int rwsem_write_proc(struct file *file, const char *buffer, unsigned long count, void *data)
{
	kdebug("[%d] w-downing: %08x\n",current->pid,rwsem_sem.count.counter);
	down_write(&rwsem_sem);
	kdebug("[%d] w-downed: %08x\n",current->pid,rwsem_sem.count.counter);
	mdelay(2);
	kdebug("[%d] w-upping: %08x\n",current->pid,rwsem_sem.count.counter);
	up_write(&rwsem_sem);
	kdebug("[%d] w-upped: %08x\n",current->pid,rwsem_sem.count.counter);

	return -ENOENT;
}

static int __init rwsem_init_module(void)
{
	kdebug(KERN_INFO "rwsem loading...\n");

	init_rwsem(&rwsem_sem);

	rwsem_proc = create_proc_entry("rwsem",S_IRUGO|S_IWUGO,NULL);
	if (!rwsem_proc)
		return -EEXIST;

	rwsem_proc->read_proc = &rwsem_read_proc;
	rwsem_proc->write_proc = &rwsem_write_proc;
	return 0;
}

static void __exit rwsem_cleanup_module(void)
{
	kdebug(KERN_INFO "rwsem unloading\n");

	remove_proc_entry("rwsem",NULL);

}

module_init(rwsem_init_module);
module_exit(rwsem_cleanup_module);

[-- Attachment #4: rw-semaphore testing module driver program --]
[-- Type: text/plain, Size: 629 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <fcntl.h>

int main(int argc, char *argv[])
{
	char buf[1];
	int max, loop, fd, tmp, rw;

	fd = open("/proc/rwsem",O_RDWR);
	if (fd<0) {
		perror("open");
		return 1;
	}

	max = (argc>1) ? atoi(argv[1]) : 50;
	rw = max<0;
	max = abs(max);

	for (loop=max; loop>0; loop--) {
		switch (fork()) {
		case -1:
			perror("fork");
			return 1;
		case 0:
			rw ? write(fd," ",1) : read(fd,buf,1);
			exit(1);
		default:
			break;
		}
	}

	for (loop=max; loop>0; loop--) {
		if (wait(&tmp)<0) {
			perror("wait");
			return 1;
		}
	}

	return 0;
}

  reply	other threads:[~2001-04-10 18:03 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <3AD0FD0F.9B0C47FD@uow.edu.au>
2001-04-09  3:08 ` rw_semaphores Linus Torvalds
2001-04-09  4:18   ` rw_semaphores Linus Torvalds
2001-04-09 13:55     ` rw_semaphores Ben LaHaise
2001-04-10  2:41   ` rw_semaphores Tachino Nobuhiro
2001-04-10  5:43     ` rw_semaphores Linus Torvalds
2001-04-10  6:33       ` rw_semaphores Tachino Nobuhiro
2001-04-10  7:47       ` rw_semaphores David Howells
2001-04-10 18:02         ` David Howells [this message]
2001-04-10 19:42           ` [PATCH] i386 rw_semaphores fix Linus Torvalds
2001-04-10 19:56             ` x86 cpu configuration (was: Re: [PATCH] i386 rw_semaphores fix) Jeff Garzik
2001-04-10 21:58               ` Alan Cox
2001-04-10 20:05             ` [PATCH] i386 rw_semaphores fix Andi Kleen
2001-04-10 20:16               ` Linus Torvalds
2001-04-10 22:00               ` Alan Cox
2001-04-11  0:00                 ` Andi Kleen
2001-04-11  0:13                   ` David Weinehall
2001-04-11  0:20                     ` Andi Kleen
2001-04-11  0:56                       ` David Weinehall
2001-04-11  1:04                         ` Andi Kleen
2001-04-11 12:32                       ` Alan Cox
2001-04-11  0:55                     ` Linus Torvalds
2001-04-11  1:07                       ` Andi Kleen
2001-04-11  1:12                         ` Linus Torvalds
2001-04-11  1:23                           ` Andi Kleen
2001-04-11 12:36                             ` Alan Cox
2001-04-11 18:05                       ` H. Peter Anvin
2001-04-11 12:28                   ` Alan Cox
2001-04-11 18:06                     ` H. Peter Anvin
2001-04-11 22:06                       ` Alan Cox
2001-04-11 22:42                         ` H. Peter Anvin
2001-04-11 22:55                           ` Alan Cox
2001-04-10 21:57             ` Alan Cox
2001-04-11  0:40               ` Tim Wright
2001-04-11  7:38             ` David Howells
2001-04-11 12:24               ` Maciej W. Rozycki
2001-04-11 12:57             ` [PATCH] 2nd try: " David Howells
2001-04-11 16:37               ` [PATCH] 3rd " David Howells
2001-04-11 21:41                 ` [PATCH] 4th " David Howells
2001-04-12 18:16                   ` Andrew Morton
2001-04-11 23:00                 ` [PATCH] 3rd " Anton Blanchard
2001-04-12 15:06                   ` [PATCH] i386 rw_semaphores, general abstraction patch David Howells
2001-04-11 16:56           ` [PATCH] i386 rw_semaphores fix Andrew Morton
2001-04-11 17:36             ` David Howells
2001-04-11 18:41               ` Linus Torvalds
2001-04-11 21:27             ` David Howells
2001-04-16 14:39         ` rw_semaphores yodaiken
2001-04-16 14:56           ` rw_semaphores Alan Cox
2001-04-16 17:05           ` rw_semaphores Linus Torvalds
2001-04-16 17:34             ` rw_semaphores yodaiken
2001-04-16 17:26           ` rw_semaphores Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11851.986925762@warthog.cambridge.redhat.com \
    --to=dhowells@cambridge.redhat.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=andrewm@uow.edu.au \
    --cc=bcrl@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox