* [Linux-ia64] ia64 rwsem using atomic primitive
@ 2003-02-19 18:29 Chen, Kenneth W
0 siblings, 0 replies; 2+ messages in thread
From: Chen, Kenneth W @ 2003-02-19 18:29 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 559 bytes --]
I have converted rw semaphore from current generic spin_lock implementation to use architecture specific atomic operation on ia64. This new scheme speeds up all the semaphore operations in the fast path with atomic instruction and fall back to a heavy function when there are read/write contention. I've also taken some raw measurement how fast it improves. The most significant gain comes from parallel reader lock acquire/release which has around 6.6X speed up with the new version. Here is a patch against 2.4.20.
<<rwsem.2.4.20.patch>>
- Ken
[-- Attachment #2: rwsem.2.4.20.patch --]
[-- Type: application/octet-stream, Size: 5264 bytes --]
diff -Nur linux-2.4.20/arch/ia64/config.in linux-2.4.20.rwsem/arch/ia64/config.in
--- linux-2.4.20/arch/ia64/config.in Wed Feb 19 10:18:31 2003
+++ linux-2.4.20.rwsem/arch/ia64/config.in Wed Feb 19 10:18:50 2003
@@ -23,8 +23,8 @@
define_bool CONFIG_EISA n
define_bool CONFIG_MCA n
define_bool CONFIG_SBUS n
-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
choice 'IA-64 processor type' \
"Itanium CONFIG_ITANIUM \
diff -Nur linux-2.4.20/include/asm-ia64/rwsem.h linux-2.4.20.rwsem/include/asm-ia64/rwsem.h
--- linux-2.4.20/include/asm-ia64/rwsem.h Wed Dec 31 16:00:00 1969
+++ linux-2.4.20.rwsem/include/asm-ia64/rwsem.h Wed Feb 19 10:20:03 2003
@@ -0,0 +1,171 @@
+/*
+ * asm-ia64/rwsem.h: R/W semaphores for ia64
+ *
+ * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
+ * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
+ *
+ * Based on asm-i386/rwsem.h and other architecture implementation.
+ *
+ * The MSW of the count is the negated number of active writers and
+ * waiting lockers, and the LSW is the total number of active locks.
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case
+ * of an uncontended lock. Readers increment by 1 and see a positive value
+ * when uncontended, negative if there are writers (and maybe) readers
+ * waiting (in which case it goes to sleep).
+ */
+
+#ifndef _IA64_RWSEM_H
+#define _IA64_RWSEM_H
+
+#ifdef __KERNEL__
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+ signed int count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#if RWSEM_DEBUG
+ int debug;
+#endif
+};
+
+#define RWSEM_UNLOCKED_VALUE 0x00000000
+#define RWSEM_ACTIVE_BIAS 0x00000001
+#define RWSEM_ACTIVE_MASK 0x0000ffff
+#define RWSEM_WAITING_BIAS (-0x00010000)
+#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * initialization
+ */
+#if RWSEM_DEBUG
+#define __RWSEM_DEBUG_INIT , 0
+#else
+#define __RWSEM_DEBUG_INIT /* */
+#endif
+
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
+ LIST_HEAD_INIT((name).wait_list) \
+ __RWSEM_DEBUG_INIT }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+ sem->count = RWSEM_UNLOCKED_VALUE;
+ spin_lock_init(&sem->wait_lock);
+ INIT_LIST_HEAD(&sem->wait_list);
+#if RWSEM_DEBUG
+ sem->debug = 0;
+#endif
+}
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+ int result;
+ __asm__ __volatile__ ("fetchadd4.acq %0=[%1],1" :
+ "=r"(result) : "r"(&sem->count) : "memory");
+ if (result < 0)
+ rwsem_down_read_failed(sem);
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+ int old, new;
+
+ do {
+ old = sem->count;
+ new = old + RWSEM_ACTIVE_WRITE_BIAS;
+ } while (cmpxchg_acq(&sem->count, old, new) != old);
+
+ if (old != 0)
+ rwsem_down_write_failed(sem);
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+ int result;
+ __asm__ __volatile__ ("fetchadd4.rel %0=[%1],-1" :
+ "=r"(result) : "r"(&sem->count) : "memory");
+ if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
+ rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+ int old, new;
+
+ do {
+ old = sem->count;
+ new = old - RWSEM_ACTIVE_WRITE_BIAS;
+ } while (cmpxchg_rel(&sem->count, old, new) != old);
+
+ if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
+ rwsem_wake(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+ int tmp;
+ while ((tmp = sem->count) >= 0) {
+ if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+ int tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
+ return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+{
+ atomic_add(delta, (atomic_t *)(&sem->count));
+}
+
+static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+{
+ return atomic_add_return(delta, (atomic_t *)(&sem->count));
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IA64_RWSEM_H */
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [Linux-ia64] ia64 rwsem using atomic primitive
@ 2003-03-06 23:27 Bjorn Helgaas
0 siblings, 0 replies; 2+ messages in thread
From: Bjorn Helgaas @ 2003-03-06 23:27 UTC (permalink / raw)
To: linux-ia64
> I have converted rw semaphore from current generic spin_lock implementation to use architecture specific atomic operation on ia64. This new scheme speeds up all the semaphore operations in the fast path with atomic instruction and fall back to a heavy function when there are read/write contention. I've also taken some raw measurement how fast it improves. The most significant gain comes from parallel reader lock acquire/release which has around 6.6X speed up with the new version. Here is a patch against 2.4.20.
I applied this patch to 2.4. I also picked up David's tweaks to improve
the optimization of ia64_fetch_and_add, including the move of that
function and related ones to intrinsics.h.
Bjorn
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2003-03-06 23:27 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-02-19 18:29 [Linux-ia64] ia64 rwsem using atomic primitive Chen, Kenneth W
-- strict thread matches above, loose matches on Subject: below --
2003-03-06 23:27 Bjorn Helgaas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox