linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Optimize cpumask functions for SMPs with < BITS_PER_LONG processors
@ 2007-09-25 15:52 Ralf Baechle
  2007-09-28 17:34 ` Andi Kleen
  0 siblings, 1 reply; 2+ messages in thread
From: Ralf Baechle @ 2007-09-25 15:52 UTC (permalink / raw)
  To: linux-arch

When debugging a kernel using a logic analyzer (!) a colleague recently
noticed that because the <linux/cpumasks.h> functions are based on the
generic bitops which support arbitrary size bitfields we had a relativly
high overhead resulting from this.  Here's the chainsaw edition of a patch
to optimize this for CONFIG_NR_CPUS <= BITS_PER_LONG.  Comments?

  Ralf

From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 31 Jul 2007 13:03:16 +0100

[PATCH] Optimize bitop code for single long bitfields such as cpumask_t on small SMP.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 9e71201..87e207e 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -236,6 +236,8 @@ test_bit(int nr, const volatile void * addr)
 	return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
+
 /*
  * ffz = Find First Zero in word. Undefined if no zero exists,
  * so code should check against ~0UL first..
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index b41831b..98dcd15 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -117,7 +117,9 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p)
 	return res & mask;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 /*
  *  A note about Endian-ness.
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
index 5299f8c..784d60b 100644
--- a/include/asm-avr32/bitops.h
+++ b/include/asm-avr32/bitops.h
@@ -230,7 +230,9 @@ static inline int test_and_change_bit(int nr, volatile void * addr)
 	return (old & mask) != 0;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 /* Find First bit Set */
 static inline unsigned long __ffs(unsigned long word)
diff --git a/include/asm-blackfin/bitops.h b/include/asm-blackfin/bitops.h
index 27c2d0e..2fec38f 100644
--- a/include/asm-blackfin/bitops.h
+++ b/include/asm-blackfin/bitops.h
@@ -11,6 +11,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index a569065..2832ebd 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -141,7 +141,9 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 	return retval;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 /*
  * Since we define it "external", it collides with the built-in
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index f8560ed..509d20b 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -303,6 +303,7 @@ int __ilog2_u64(u64 n)
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/hweight.h>
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 
 #define ext2_set_bit_atomic(lock,nr,addr)	test_and_set_bit  ((nr) ^ 0x18, (addr))
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index 1f9d991..c741462 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -10,7 +10,9 @@
  */
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls.h>
diff --git a/include/asm-generic/bitops/atomic-long.h b/include/asm-generic/bitops/atomic-long.h
new file mode 100644
index 0000000..ec8ae3b
--- /dev/null
+++ b/include/asm-generic/bitops/atomic-long.h
@@ -0,0 +1,112 @@
+#ifndef _ASM_GENERIC_BITOPS_ATOMIC_LONG_H_
+#define _ASM_GENERIC_BITOPS_ATOMIC_LONG_H_
+
+#include <asm/types.h>
+
+/*
+ * long_set_bit - Atomically set a bit in memory long
+ * @nr: the bit to set
+ * @addr: the address of the long
+ *
+ * This function is atomic and may not be reordered.  See __long_set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_set_bit(int nr, volatile unsigned long *addr)
+{
+	set_bit(nr, addr);
+}
+
+/*
+ * long_clear_bit - Clears a bit in memory long
+ * @nr: Bit to clear
+ * @addr: Address of long variable
+ *
+ * long_clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_clear_bit(int nr, volatile unsigned long *addr)
+{
+	clear_bit(nr, addr);
+}
+
+/*
+ * long_change_bit - Toggle a bit in memory long
+ * @nr: Bit to change
+ * @addr: Address of long variable
+ *
+ * long_change_bit() is atomic and may not be reordered. It may be
+ * reordered on other architectures than x86.
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_change_bit(int nr, volatile unsigned long *addr)
+{
+	change_bit(nr, addr);
+}
+
+/*
+ * long_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address of long in memory
+ *
+ * This operation is atomic and cannot be reordered.
+ * It may be reordered on other architectures than x86.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_set_bit(nr, addr);
+}
+
+/*
+ * long_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It can be reorderdered on other architectures other than x86.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_clear_bit(nr, addr);
+}
+
+/*
+ * long_test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	return test_and_change_bit(nr, addr);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_ATOMIC_LONG_H */
diff --git a/include/asm-generic/bitops/non-atomic-long.h b/include/asm-generic/bitops/non-atomic-long.h
new file mode 100644
index 0000000..d26a39a
--- /dev/null
+++ b/include/asm-generic/bitops/non-atomic-long.h
@@ -0,0 +1,119 @@
+/*
+ * Bitops that only work on a single long instead of an array as their more
+ * generic non-long_* relatives which allows some better code optimization.
+ * For a bit number argument <= BITS_PER_LONG the two variants are identical,
+ * for numbers > BITS_PER_LONG the operation of thelong_* variants is
+ * undefined.
+ */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_
+
+#include <asm/types.h>
+
+#define LONG_BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+
+/**
+ * __long_set_bit - Set a bit in memory long..
+ * @nr: the bit to set
+ * @addr: the address of the long variable.
+ *
+ * Unlike long_set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __long_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+
+	*p  |= mask;
+}
+
+static inline void __long_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+
+	*p &= ~mask;
+}
+
+/**
+ * __long_change_bit - Toggle a bit in memory long
+ * @nr: the bit to change
+ * @addr: the address of the long variable
+ *
+ * Unlike long_change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __long_change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+
+	*p ^= mask;
+}
+
+/**
+ * __long_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address of long variable
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __long_test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/*
+ * __long_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address of long variable in memory
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __long_test_and_clear_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __long_test_and_change_bit(int nr,
+					     volatile unsigned long *addr)
+{
+	unsigned long mask = LONG_BITOP_MASK(nr);
+	unsigned long *p = (unsigned long *) addr;
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * long_test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+
+static inline int long_test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (*addr >> nr);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_ */
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index d76299c..ba6d3f5 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -194,6 +194,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/ext2-non-atomic.h>
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/minix.h>
diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h
index a20fe98..1079ba8 100644
--- a/include/asm-i386/bitops.h
+++ b/include/asm-i386/bitops.h
@@ -58,6 +58,8 @@ static inline void __set_bit(int nr, volatile unsigned long * addr)
 		:"Ir" (nr));
 }
 
+#define __long_set_bit(nr,addr) __set_bit((nr), (addr))
+
 /**
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
@@ -83,6 +85,9 @@ static inline void __clear_bit(int nr, volatile unsigned long * addr)
 		:"+m" (ADDR)
 		:"Ir" (nr));
 }
+
+#define __long_clear_bit(nr,addr) __clear_bit((nr), (addr))
+
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
 
@@ -103,6 +108,8 @@ static inline void __change_bit(int nr, volatile unsigned long * addr)
 		:"Ir" (nr));
 }
 
+#define __long_change_bit(nr,addr) __change_bit((nr), (addr))
+
 /**
  * change_bit - Toggle a bit in memory
  * @nr: Bit to change
@@ -161,6 +168,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
 	return oldbit;
 }
 
+#define __long_test_and_set_bit(nr,addr) __test_and_set_bit((nr), (addr))
+
 /**
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to clear
@@ -201,6 +210,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 	return oldbit;
 }
 
+#define __long_test_and_clear_bit(nr,addr) __test_and_clear_bit((nr), (addr))
+
 /* WARNING: non atomic and it can be reordered! */
 static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 {
@@ -213,6 +224,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
 	return oldbit;
 }
 
+#define __long_test_and_change_bit(nr,addr) __test_and_change_bit((nr), (addr))
+
 /**
  * test_and_change_bit - Change a bit and return its old value
  * @nr: Bit to change
@@ -262,6 +275,10 @@ static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
  constant_test_bit((nr),(addr)) : \
  variable_test_bit((nr),(addr)))
 
+#define long_test_bit(nr,addr) test_bit((nr), (addr))
+
+#include <asm-generic/bitops/atomic-long.h>
+
 #undef ADDR
 
 /**
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 6cc517e..7c97528 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -279,6 +279,8 @@ test_bit (int nr, const volatile void *addr)
 	return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
 }
 
+#include <asm-generic/bitops/atomic-long.h>
+
 /**
  * ffz - find the first zero bit in a long word
  * @x: The long word to find the bit in
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 66ab672..20ecc60 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -243,7 +243,9 @@ static __inline__ int test_and_change_bit(int nr, volatile void * addr)
 	return (oldbit != 0);
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 #include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/fls.h>
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index 1a61fdb..86d67ba 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -172,6 +172,8 @@ static inline int test_bit(int nr, const unsigned long *vaddr)
 	return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
+
 static inline int find_first_zero_bit(const unsigned long *vaddr,
 				      unsigned size)
 {
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index 7d6075d..d8f9a20 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -158,6 +158,7 @@ static __inline__ int __test_bit(int nr, const volatile unsigned long * addr)
  __constant_test_bit((nr),(addr)) : \
  __test_bit((nr),(addr)))
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/hweight.h>
 
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 148bc79..210fef4 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -51,16 +51,16 @@
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_set_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned long *m = (unsigned long *) addr;
+	unsigned short bit = nr;
 	unsigned long temp;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# set_bit	\n"
+		"1:	" __LL "%0, %1			# long_set_bit	\n"
 		"	or	%0, %2					\n"
 		"	" __SC	"%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
@@ -70,7 +70,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (__builtin_constant_p(bit)) {
 		__asm__ __volatile__(
-		"1:	" __LL "%0, %1			# set_bit	\n"
+		"1:	" __LL "%0, %1			# long_set_bit	\n"
 		"	" __INS "%0, %4, %2, 1				\n"
 		"	" __SC "%0, %1					\n"
 		"	beqz	%0, 2f					\n"
@@ -83,7 +83,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# set_bit	\n"
+		"1:	" __LL "%0, %1			# long_set_bit	\n"
 		"	or	%0, %2					\n"
 		"	" __SC	"%0, %1					\n"
 		"	beqz	%0, 2f					\n"
@@ -98,7 +98,6 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		*a |= mask;
@@ -106,6 +105,15 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 }
 
+static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	long_set_bit(bit, m);
+}
+
+
 /*
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
@@ -116,16 +124,16 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
-static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_clear_bit(unsigned long nr, volatile unsigned long *addr)
 {
-	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned long *m = (unsigned long *) addr;
+	unsigned short bit = nr;
 	unsigned long temp;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# clear_bit	\n"
+		"1:	" __LL "%0, %1			# long_clear_bit\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
 		"	beqzl	%0, 1b					\n"
@@ -135,7 +143,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (__builtin_constant_p(bit)) {
 		__asm__ __volatile__(
-		"1:	" __LL "%0, %1			# clear_bit	\n"
+		"1:	" __LL "%0, %1			# long_clear_bit\n"
 		"	" __INS "%0, $0, %2, 1				\n"
 		"	" __SC "%0, %1					\n"
 		"	beqz	%0, 2f					\n"
@@ -148,7 +156,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 	} else if (cpu_has_llsc) {
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1			# clear_bit	\n"
+		"1:	" __LL "%0, %1			# long_clear_bit\n"
 		"	and	%0, %2					\n"
 		"	" __SC "%0, %1					\n"
 		"	beqz	%0, 2f					\n"
@@ -163,7 +171,6 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		*a &= ~mask;
@@ -171,6 +178,14 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 }
 
+static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	long_clear_bit(bit, m);
+}
+
 /*
  * change_bit - Toggle a bit in memory
  * @nr: Bit to change
@@ -180,37 +195,38 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
-static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_change_bit(unsigned long nr,
+				   volatile unsigned long *addr)
 {
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned short bit = nr;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
-		"	.set	mips3				\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqzl	%0, 1b				\n"
-		"	.set	mips0				\n"
+		"	.set	mips3					\n"
+		"1:	" __LL "%0, %1		# long_change_bit	\n"
+		"	xor	%0, %2					\n"
+		"	" __SC	"%0, %1					\n"
+		"	beqzl	%0, 1b					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << bit), "m" (*m));
 	} else if (cpu_has_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
-		"	.set	mips3				\n"
-		"1:	" __LL "%0, %1		# change_bit	\n"
-		"	xor	%0, %2				\n"
-		"	" __SC	"%0, %1				\n"
-		"	beqz	%0, 2f				\n"
-		"	.subsection 2				\n"
-		"2:	b	1b				\n"
-		"	.previous				\n"
-		"	.set	mips0				\n"
+		"	.set	mips3					\n"
+		"1:	" __LL "%0, %1		# long_change_bit	\n"
+		"	xor	%0, %2					\n"
+		"	" __SC	"%0, %1					\n"
+		"	beqz	%0, 2f					\n"
+		"	.subsection 2					\n"
+		"2:	b	1b					\n"
+		"	.previous					\n"
+		"	.set	mips0					\n"
 		: "=&r" (temp), "=m" (*m)
 		: "ir" (1UL << bit), "m" (*m));
 	} else {
@@ -218,7 +234,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		*a ^= mask;
@@ -226,6 +241,15 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 	}
 }
 
+static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	long_change_bit(bit, m);
+}
+
+
 /*
  * test_and_set_bit - Set a bit and return its old value
  * @nr: Bit to set
@@ -234,19 +258,19 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_set_bit(unsigned long nr,
+static inline int long_test_and_set_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned short bit = nr;
 	unsigned long res;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
+		"1:	" __LL "%0, %1		# long_test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
@@ -256,14 +280,14 @@ static inline int test_and_set_bit(unsigned long nr,
 		: "r" (1UL << bit), "m" (*m)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	noreorder				\n"
 		"	.set	mips3					\n"
-		"1:	" __LL "%0, %1		# test_and_set_bit	\n"
+		"1:	" __LL "%0, %1		# long_test_and_set_bit	\n"
 		"	or	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
 		"	beqz	%2, 2f					\n"
@@ -281,7 +305,6 @@ static inline int test_and_set_bit(unsigned long nr,
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		res = (mask & *a);
@@ -294,6 +317,15 @@ static inline int test_and_set_bit(unsigned long nr,
 	return res != 0;
 }
 
+static inline int test_and_set_bit(unsigned long nr,
+	volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	return long_test_and_set_bit(bit, m);
+}
+
 /*
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to clear
@@ -302,19 +334,19 @@ static inline int test_and_set_bit(unsigned long nr,
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_clear_bit(unsigned long nr,
+static inline int long_test_and_clear_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned short bit = nr;
 	unsigned long res;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
+		"1:	" __LL	"%0, %1		# long_test_and_clear_bit\n"
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
 		"	" __SC 	"%2, %1					\n"
@@ -326,11 +358,11 @@ static inline int test_and_clear_bit(unsigned long nr,
 		: "memory");
 #ifdef CONFIG_CPU_MIPSR2
 	} else if (__builtin_constant_p(nr)) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
+		"1:	" __LL	"%0, %1		# long_test_and_clear_bit\n"
 		"	" __EXT "%2, %0, %3, 1				\n"
 		"	" __INS	"%0, $0, %3, 1				\n"
 		"	" __SC 	"%0, %1					\n"
@@ -343,14 +375,14 @@ static inline int test_and_clear_bit(unsigned long nr,
 		: "memory");
 #endif
 	} else if (cpu_has_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	noreorder				\n"
 		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_clear_bit	\n"
+		"1:	" __LL	"%0, %1		# long_test_and_clear_bit\n"
 		"	or	%2, %0, %3				\n"
 		"	xor	%2, %3					\n"
 		"	" __SC 	"%2, %1					\n"
@@ -369,7 +401,6 @@ static inline int test_and_clear_bit(unsigned long nr,
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		res = (mask & *a);
@@ -382,6 +413,15 @@ static inline int test_and_clear_bit(unsigned long nr,
 	return res != 0;
 }
 
+static inline int test_and_clear_bit(unsigned long nr,
+	volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	return long_test_and_clear_bit(bit, m);
+}
+
 /*
  * test_and_change_bit - Change a bit and return its old value
  * @nr: Bit to change
@@ -390,19 +430,19 @@ static inline int test_and_clear_bit(unsigned long nr,
  * This operation is atomic and cannot be reordered.
  * It also implies a memory barrier.
  */
-static inline int test_and_change_bit(unsigned long nr,
+static inline int long_test_and_change_bit(unsigned long nr,
 	volatile unsigned long *addr)
 {
-	unsigned short bit = nr & SZLONG_MASK;
+	unsigned short bit = nr;
 	unsigned long res;
 
 	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
+		"1:	" __LL	"%0, %1		# long_test_and_change_bit\n"
 		"	xor	%2, %0, %3				\n"
 		"	" __SC	"%2, %1					\n"
 		"	beqzl	%2, 1b					\n"
@@ -412,14 +452,14 @@ static inline int test_and_change_bit(unsigned long nr,
 		: "r" (1UL << bit), "m" (*m)
 		: "memory");
 	} else if (cpu_has_llsc) {
-		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+		unsigned long *m = (unsigned long *) addr;
 		unsigned long temp;
 
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	noreorder				\n"
 		"	.set	mips3					\n"
-		"1:	" __LL	"%0, %1		# test_and_change_bit	\n"
+		"1:	" __LL	"%0, %1		# long_test_and_change_bit\n"
 		"	xor	%2, %0, %3				\n"
 		"	" __SC	"\t%2, %1				\n"
 		"	beqz	%2, 2f					\n"
@@ -437,7 +477,6 @@ static inline int test_and_change_bit(unsigned long nr,
 		unsigned long mask;
 		unsigned long flags;
 
-		a += nr >> SZLONG_LOG;
 		mask = 1UL << bit;
 		raw_local_irq_save(flags);
 		res = (mask & *a);
@@ -450,7 +489,17 @@ static inline int test_and_change_bit(unsigned long nr,
 	return res != 0;
 }
 
+static inline int test_and_change_bit(unsigned long nr,
+	volatile unsigned long *addr)
+{
+	unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+	unsigned short bit = nr & SZLONG_MASK;
+
+	return long_test_and_change_bit(bit, m);
+}
+
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 /*
  * Return the bit position (0..63) of the most significant 1 bit in a word
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 015cb0d..8a091cd 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -108,7 +108,9 @@ static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
 	return (oldbit & mask) ? 1 : 0;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index 8144a27..032b39e 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -183,7 +183,9 @@ static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
 	: "cc");
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 /*
  * Return the zero-based bit position (LE, not IBM bit numbering) of
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index f79c9b7..a52679a 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -435,6 +435,8 @@ __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) {
  __constant_test_bit((nr),(addr)) : \
  __test_bit((nr),(addr)) )
 
+#include <asm-generic/bitops/atomic-long.h>
+
 /*
  * ffz = Find First Zero in word. Undefined if no zero exists,
  * so code should check against ~0UL first..
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 1c16792..7b8c9b7 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -98,7 +98,9 @@ static inline int test_and_change_bit(int nr, volatile void * addr)
 	return retval;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 static inline unsigned long ffz(unsigned long word)
 {
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index f3bdcdb..09c8824 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -109,7 +109,9 @@ static __inline__ int test_and_change_bit(int nr, volatile void * addr)
 	return retval;
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 static __inline__ unsigned long ffz(unsigned long word)
 {
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 329e696..1aa4cbd 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -84,7 +84,9 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
 	(void) ___change_bit(ADDR, mask);
 }
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 #define smp_mb__before_clear_bit()	do { } while(0)
 #define smp_mb__after_clear_bit()	do { } while(0)
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 3d5e1af..9eacf61 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -17,7 +17,9 @@ extern void set_bit(unsigned long nr, volatile unsigned long *addr);
 extern void clear_bit(unsigned long nr, volatile unsigned long *addr);
 extern void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 #ifdef CONFIG_SMP
 #define smp_mb__before_clear_bit()	membar_storeload_loadload()
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index 1fa99ba..0810259 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -138,6 +138,7 @@ static inline int __test_bit (int nr, const void *addr)
 #define smp_mb__before_clear_bit()	barrier ()
 #define smp_mb__after_clear_bit()	barrier ()
 
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/fls64.h>
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
index d4dbbe5..1fb5315 100644
--- a/include/asm-x86_64/bitops.h
+++ b/include/asm-x86_64/bitops.h
@@ -254,6 +254,8 @@ static __inline__ int variable_test_bit(int nr, volatile const void * addr)
 
 #undef ADDR
 
+#include <asm-generic/bitops/atomic-long.h>
+
 extern long find_first_zero_bit(const unsigned long * addr, unsigned long size);
 extern long find_next_zero_bit (const unsigned long * addr, long size, long offset);
 extern long find_first_bit(const unsigned long * addr, unsigned long size);
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 1c1e0d9..1754bac 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -27,7 +27,9 @@
 #define smp_mb__after_clear_bit()	barrier()
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/atomic-long.h>
 #include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
 
 #if XCHAL_HAVE_NSA
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 23f5514..3147f21 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -91,13 +91,19 @@ extern cpumask_t _unused_cpumask_arg_;
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
 static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
 {
-	set_bit(cpu, dstp->bits);
+	if (NR_CPUS <= BITS_PER_LONG)
+		long_set_bit(cpu, dstp->bits);
+	else
+		set_bit(cpu, dstp->bits);
 }
 
 #define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
 static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
 {
-	clear_bit(cpu, dstp->bits);
+	if (NR_CPUS <= BITS_PER_LONG)
+		long_clear_bit(cpu, dstp->bits);
+	else
+		clear_bit(cpu, dstp->bits);
 }
 
 #define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
@@ -113,12 +119,25 @@ static inline void __cpus_clear(cpumask_t *dstp, int nbits)
 }
 
 /* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+#define cpu_isset(cpu, cpumask)						\
+({									\
+	int __res;							\
+									\
+	if (NR_CPUS <= BITS_PER_LONG)					\
+		__res = long_test_bit((cpu), (cpumask).bits);		\
+	else								\
+		__res = test_bit((cpu), (cpumask).bits);		\
+									\
+	__res;								\
+})
 
 #define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
 static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
 {
-	return test_and_set_bit(cpu, addr->bits);
+	if (NR_CPUS <= BITS_PER_LONG)
+		return long_test_and_set_bit(cpu, addr->bits);
+	else
+		return test_and_set_bit(cpu, addr->bits);
 }
 
 #define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: Optimize cpumask functions for SMPs with < BITS_PER_LONG processors
  2007-09-25 15:52 Optimize cpumask functions for SMPs with < BITS_PER_LONG processors Ralf Baechle
@ 2007-09-28 17:34 ` Andi Kleen
  0 siblings, 0 replies; 2+ messages in thread
From: Andi Kleen @ 2007-09-28 17:34 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: linux-arch

On Tuesday 25 September 2007 17:52:00 Ralf Baechle wrote:
> When debugging a kernel using a logic analyzer (!) a colleague recently
> noticed that because the <linux/cpumasks.h> functions are based on the
> generic bitops which support arbitrary size bitfields we had a relativly
> high overhead resulting from this.  Here's the chainsaw edition of a patch
> to optimize this for CONFIG_NR_CPUS <= BITS_PER_LONG.  Comments?

The right thing to test is not CONFIG_NR_CPUS, but just
do __builtin_constant_p(x) && (x) <= BITS_PER_LONG ? fast case : external call
in find_*_bit()

x86-64 has done this already for some time. But one issue is that 
that the cpumask walk functions currently do (n = find_*_bit()) >= maxbit ? maxbit : n
which also creates more overhead because some architectures get this
wrong (including x86-64 I must admit) 

-Andi


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-09-28 17:34 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-09-25 15:52 Optimize cpumask functions for SMPs with < BITS_PER_LONG processors Ralf Baechle
2007-09-28 17:34 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).