public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [Linux-ia64] ia64 rwsem using atomic primitive
@ 2003-02-19 18:29 Chen, Kenneth W
  0 siblings, 0 replies; 2+ messages in thread
From: Chen, Kenneth W @ 2003-02-19 18:29 UTC (permalink / raw)
  To: linux-ia64

[-- Attachment #1: Type: text/plain, Size: 559 bytes --]

I have converted rw semaphore from current generic spin_lock implementation to use architecture specific atomic operation on ia64.  This new scheme speeds up all the semaphore operations in the fast path with atomic instruction and fall back to a heavy function when there are read/write contention.  I've also taken some raw measurement how fast it improves.  The most significant gain comes from parallel reader lock acquire/release which has around 6.6X speed up with the new version.  Here is a patch against 2.4.20.

 <<rwsem.2.4.20.patch>> 
- Ken


[-- Attachment #2: rwsem.2.4.20.patch --]
[-- Type: application/octet-stream, Size: 5264 bytes --]

diff -Nur linux-2.4.20/arch/ia64/config.in linux-2.4.20.rwsem/arch/ia64/config.in
--- linux-2.4.20/arch/ia64/config.in	Wed Feb 19 10:18:31 2003
+++ linux-2.4.20.rwsem/arch/ia64/config.in	Wed Feb 19 10:18:50 2003
@@ -23,8 +23,8 @@
 define_bool CONFIG_EISA n
 define_bool CONFIG_MCA n
 define_bool CONFIG_SBUS n
-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
 
 choice 'IA-64 processor type' \
 	"Itanium		CONFIG_ITANIUM \
diff -Nur linux-2.4.20/include/asm-ia64/rwsem.h linux-2.4.20.rwsem/include/asm-ia64/rwsem.h
--- linux-2.4.20/include/asm-ia64/rwsem.h	Wed Dec 31 16:00:00 1969
+++ linux-2.4.20.rwsem/include/asm-ia64/rwsem.h	Wed Feb 19 10:20:03 2003
@@ -0,0 +1,171 @@
+/*
+ * asm-ia64/rwsem.h: R/W semaphores for ia64
+ *
+ * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
+ * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
+ *
+ * Based on asm-i386/rwsem.h and other architecture implementation.
+ *
+ * The MSW of the count is the negated number of active writers and
+ * waiting lockers, and the LSW is the total number of active locks.
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case
+ * of an uncontended lock. Readers increment by 1 and see a positive value
+ * when uncontended, negative if there are writers (and maybe) readers
+ * waiting (in which case it goes to sleep).
+ */
+
+#ifndef _IA64_RWSEM_H
+#define _IA64_RWSEM_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+	signed int		count;
+	spinlock_t		wait_lock;
+	struct list_head	wait_list;
+#if RWSEM_DEBUG
+	int			debug;
+#endif
+};
+
+#define RWSEM_UNLOCKED_VALUE		0x00000000
+#define RWSEM_ACTIVE_BIAS		0x00000001
+#define RWSEM_ACTIVE_MASK		0x0000ffff
+#define RWSEM_WAITING_BIAS		(-0x00010000)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * initialization
+ */
+#if RWSEM_DEBUG
+#define __RWSEM_DEBUG_INIT      , 0
+#else
+#define __RWSEM_DEBUG_INIT	/* */
+#endif
+
+#define __RWSEM_INITIALIZER(name) \
+	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
+	  LIST_HEAD_INIT((name).wait_list) \
+	  __RWSEM_DEBUG_INIT }
+
+#define DECLARE_RWSEM(name) \
+	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+	sem->count = RWSEM_UNLOCKED_VALUE;
+	spin_lock_init(&sem->wait_lock);
+	INIT_LIST_HEAD(&sem->wait_list);
+#if RWSEM_DEBUG
+	sem->debug = 0;
+#endif
+}
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+	int result;
+	__asm__ __volatile__ ("fetchadd4.acq %0=[%1],1" :
+			      "=r"(result) : "r"(&sem->count) : "memory");
+	if (result < 0)
+		rwsem_down_read_failed(sem);
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	int old, new;
+
+	do {
+		old = sem->count;
+		new = old + RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_acq(&sem->count, old, new) != old);
+
+	if (old != 0)
+		rwsem_down_write_failed(sem);
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+	int result;
+	__asm__ __volatile__ ("fetchadd4.rel %0=[%1],-1" :
+			      "=r"(result) : "r"(&sem->count) : "memory");
+	if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+	int old, new;
+
+	do {
+		old = sem->count;
+		new = old - RWSEM_ACTIVE_WRITE_BIAS;
+	} while (cmpxchg_rel(&sem->count, old, new) != old);
+
+	if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
+		rwsem_wake(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+	int tmp;
+	while ((tmp = sem->count) >= 0) {
+		if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+	int tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
+			  RWSEM_ACTIVE_WRITE_BIAS);
+	return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+{
+	atomic_add(delta, (atomic_t *)(&sem->count));
+}
+
+static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+{
+	return atomic_add_return(delta, (atomic_t *)(&sem->count));
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IA64_RWSEM_H */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [Linux-ia64] ia64 rwsem using atomic primitive
@ 2003-03-06 23:27 Bjorn Helgaas
  0 siblings, 0 replies; 2+ messages in thread
From: Bjorn Helgaas @ 2003-03-06 23:27 UTC (permalink / raw)
  To: linux-ia64

> I have converted rw semaphore from current generic spin_lock implementation to use architecture specific atomic operation on ia64.  This new scheme speeds up all the semaphore operations in the fast path with atomic instruction and fall back to a heavy function when there are read/write contention.  I've also taken some raw measurement how fast it improves.  The most significant gain comes from parallel reader lock acquire/release which has around 6.6X speed up with the new version.  Here is a patch against 2.4.20.

I applied this patch to 2.4.  I also picked up David's tweaks to improve
the optimization of ia64_fetch_and_add, including the move of that
function and related ones to intrinsics.h.

Bjorn



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2003-03-06 23:27 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-02-19 18:29 [Linux-ia64] ia64 rwsem using atomic primitive Chen, Kenneth W
  -- strict thread matches above, loose matches on Subject: below --
2003-03-06 23:27 Bjorn Helgaas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox