From: Ralf Baechle <ralf@linux-mips.org>
To: linux-arch@vger.kernel.org
Subject: Optimize cpumask functions for SMPs with < BITS_PER_LONG processors
Date: Tue, 25 Sep 2007 16:52:00 +0100 [thread overview]
Message-ID: <20070925155200.GA7342@linux-mips.org> (raw)
When debugging a kernel using a logic analyzer (!) a colleague recently
noticed that because the <linux/cpumasks.h> functions are based on the
generic bitops which support arbitrary size bitfields we had a relativly
high overhead resulting from this. Here's the chainsaw edition of a patch
to optimize this for CONFIG_NR_CPUS <= BITS_PER_LONG. Comments?
Ralf
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 31 Jul 2007 13:03:16 +0100
[PATCH] Optimize bitop code for single long bitfields such as cpumask_t on small SMP.
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 9e71201..87e207e 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -236,6 +236,8 @@ test_bit(int nr, const volatile void * addr)
return (1UL & (((const int *) addr)[nr >> 5] >> (nr & 31))) != 0UL;
}
+#include <asm-generic/bitops/atomic-long.h>
+
/*
* ffz = Find First Zero in word. Undefined if no zero exists,
* so code should check against ~0UL first..
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index b41831b..98dcd15 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -117,7 +117,9 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p)
return res & mask;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
/*
* A note about Endian-ness.
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
index 5299f8c..784d60b 100644
--- a/include/asm-avr32/bitops.h
+++ b/include/asm-avr32/bitops.h
@@ -230,7 +230,9 @@ static inline int test_and_change_bit(int nr, volatile void * addr)
return (old & mask) != 0;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
/* Find First bit Set */
static inline unsigned long __ffs(unsigned long word)
diff --git a/include/asm-blackfin/bitops.h b/include/asm-blackfin/bitops.h
index 27c2d0e..2fec38f 100644
--- a/include/asm-blackfin/bitops.h
+++ b/include/asm-blackfin/bitops.h
@@ -11,6 +11,7 @@
#ifdef __KERNEL__
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/sched.h>
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index a569065..2832ebd 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -141,7 +141,9 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
return retval;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
/*
* Since we define it "external", it collides with the built-in
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index f8560ed..509d20b 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -303,6 +303,7 @@ int __ilog2_u64(u64 n)
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/ext2-non-atomic.h>
#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_bit ((nr) ^ 0x18, (addr))
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index 1f9d991..c741462 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -10,7 +10,9 @@
*/
#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/fls.h>
diff --git a/include/asm-generic/bitops/atomic-long.h b/include/asm-generic/bitops/atomic-long.h
new file mode 100644
index 0000000..ec8ae3b
--- /dev/null
+++ b/include/asm-generic/bitops/atomic-long.h
@@ -0,0 +1,112 @@
+#ifndef _ASM_GENERIC_BITOPS_ATOMIC_LONG_H_
+#define _ASM_GENERIC_BITOPS_ATOMIC_LONG_H_
+
+#include <asm/types.h>
+
+/*
+ * long_set_bit - Atomically set a bit in memory long
+ * @nr: the bit to set
+ * @addr: the address of the long
+ *
+ * This function is atomic and may not be reordered. See __long_set_bit()
+ * if you do not require the atomic guarantees.
+ *
+ * Note: there are no guarantees that this function will not be reordered
+ * on non x86 architectures, so if you are writing portable code,
+ * make sure not to rely on its reordering guarantees.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_set_bit(int nr, volatile unsigned long *addr)
+{
+ set_bit(nr, addr);
+}
+
+/*
+ * long_clear_bit - Clears a bit in memory long
+ * @nr: Bit to clear
+ * @addr: Address of long variable
+ *
+ * long_clear_bit() is atomic and may not be reordered. However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_clear_bit(int nr, volatile unsigned long *addr)
+{
+ clear_bit(nr, addr);
+}
+
+/*
+ * long_change_bit - Toggle a bit in memory long
+ * @nr: Bit to change
+ * @addr: Address of long variable
+ *
+ * long_change_bit() is atomic and may not be reordered. It may be
+ * reordered on other architectures than x86.
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline void long_change_bit(int nr, volatile unsigned long *addr)
+{
+ change_bit(nr, addr);
+}
+
+/*
+ * long_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address of long in memory
+ *
+ * This operation is atomic and cannot be reordered.
+ * It may be reordered on other architectures than x86.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ return test_and_set_bit(nr, addr);
+}
+
+/*
+ * long_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It can be reorderdered on other architectures other than x86.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ return test_and_clear_bit(nr, addr);
+}
+
+/*
+ * long_test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.
+ * It also implies a memory barrier.
+ *
+ * Note that @nr must be less than BITS_PER_LONG; this function is
+ * restricted to acting on a single-word quantity.
+ */
+static inline int long_test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+ return test_and_change_bit(nr, addr);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_ATOMIC_LONG_H */
diff --git a/include/asm-generic/bitops/non-atomic-long.h b/include/asm-generic/bitops/non-atomic-long.h
new file mode 100644
index 0000000..d26a39a
--- /dev/null
+++ b/include/asm-generic/bitops/non-atomic-long.h
@@ -0,0 +1,119 @@
+/*
+ * Bitops that only work on a single long instead of an array as their more
+ * generic non-long_* relatives which allows some better code optimization.
+ * For a bit number argument <= BITS_PER_LONG the two variants are identical,
+ * for numbers > BITS_PER_LONG the operation of thelong_* variants is
+ * undefined.
+ */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_
+
+#include <asm/types.h>
+
+#define LONG_BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+
+/**
+ * __long_set_bit - Set a bit in memory long..
+ * @nr: the bit to set
+ * @addr: the address of the long variable.
+ *
+ * Unlike long_set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __long_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+
+ *p |= mask;
+}
+
+static inline void __long_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+
+ *p &= ~mask;
+}
+
+/**
+ * __long_change_bit - Toggle a bit in memory long
+ * @nr: the bit to change
+ * @addr: the address of the long variable
+ *
+ * Unlike long_change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __long_change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+
+ *p ^= mask;
+}
+
+/**
+ * __long_test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address of long variable
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __long_test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/*
+ * __long_test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address of long variable in memory
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __long_test_and_clear_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __long_test_and_change_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = LONG_BITOP_MASK(nr);
+ unsigned long *p = (unsigned long *) addr;
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * long_test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+
+static inline int long_test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (*addr >> nr);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_LONG_H_ */
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index d76299c..ba6d3f5 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -194,6 +194,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/ext2-non-atomic.h>
#include <asm-generic/bitops/ext2-atomic.h>
#include <asm-generic/bitops/minix.h>
diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h
index a20fe98..1079ba8 100644
--- a/include/asm-i386/bitops.h
+++ b/include/asm-i386/bitops.h
@@ -58,6 +58,8 @@ static inline void __set_bit(int nr, volatile unsigned long * addr)
:"Ir" (nr));
}
+#define __long_set_bit(nr,addr) __set_bit((nr), (addr))
+
/**
* clear_bit - Clears a bit in memory
* @nr: Bit to clear
@@ -83,6 +85,9 @@ static inline void __clear_bit(int nr, volatile unsigned long * addr)
:"+m" (ADDR)
:"Ir" (nr));
}
+
+#define __long_clear_bit(nr,addr) __clear_bit((nr), (addr))
+
#define smp_mb__before_clear_bit() barrier()
#define smp_mb__after_clear_bit() barrier()
@@ -103,6 +108,8 @@ static inline void __change_bit(int nr, volatile unsigned long * addr)
:"Ir" (nr));
}
+#define __long_change_bit(nr,addr) __change_bit((nr), (addr))
+
/**
* change_bit - Toggle a bit in memory
* @nr: Bit to change
@@ -161,6 +168,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long * addr)
return oldbit;
}
+#define __long_test_and_set_bit(nr,addr) __test_and_set_bit((nr), (addr))
+
/**
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
@@ -201,6 +210,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
return oldbit;
}
+#define __long_test_and_clear_bit(nr,addr) __test_and_clear_bit((nr), (addr))
+
/* WARNING: non atomic and it can be reordered! */
static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
{
@@ -213,6 +224,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
return oldbit;
}
+#define __long_test_and_change_bit(nr,addr) __test_and_change_bit((nr), (addr))
+
/**
* test_and_change_bit - Change a bit and return its old value
* @nr: Bit to change
@@ -262,6 +275,10 @@ static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
constant_test_bit((nr),(addr)) : \
variable_test_bit((nr),(addr)))
+#define long_test_bit(nr,addr) test_bit((nr), (addr))
+
+#include <asm-generic/bitops/atomic-long.h>
+
#undef ADDR
/**
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 6cc517e..7c97528 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -279,6 +279,8 @@ test_bit (int nr, const volatile void *addr)
return 1 & (((const volatile __u32 *) addr)[nr >> 5] >> (nr & 31));
}
+#include <asm-generic/bitops/atomic-long.h>
+
/**
* ffz - find the first zero bit in a long word
* @x: The long word to find the bit in
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 66ab672..20ecc60 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -243,7 +243,9 @@ static __inline__ int test_and_change_bit(int nr, volatile void * addr)
return (oldbit != 0);
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#include <asm-generic/bitops/ffz.h>
#include <asm-generic/bitops/__ffs.h>
#include <asm-generic/bitops/fls.h>
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index 1a61fdb..86d67ba 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -172,6 +172,8 @@ static inline int test_bit(int nr, const unsigned long *vaddr)
return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0;
}
+#include <asm-generic/bitops/atomic-long.h>
+
static inline int find_first_zero_bit(const unsigned long *vaddr,
unsigned size)
{
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index 7d6075d..d8f9a20 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -158,6 +158,7 @@ static __inline__ int __test_bit(int nr, const volatile unsigned long * addr)
__constant_test_bit((nr),(addr)) : \
__test_bit((nr),(addr)))
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 148bc79..210fef4 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -51,16 +51,16 @@
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_set_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned long *m = (unsigned long *) addr;
+ unsigned short bit = nr;
unsigned long temp;
if (cpu_has_llsc && R10000_LLSC_WAR) {
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # set_bit \n"
+ "1: " __LL "%0, %1 # long_set_bit \n"
" or %0, %2 \n"
" " __SC "%0, %1 \n"
" beqzl %0, 1b \n"
@@ -70,7 +70,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
#ifdef CONFIG_CPU_MIPSR2
} else if (__builtin_constant_p(bit)) {
__asm__ __volatile__(
- "1: " __LL "%0, %1 # set_bit \n"
+ "1: " __LL "%0, %1 # long_set_bit \n"
" " __INS "%0, %4, %2, 1 \n"
" " __SC "%0, %1 \n"
" beqz %0, 2f \n"
@@ -83,7 +83,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
} else if (cpu_has_llsc) {
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # set_bit \n"
+ "1: " __LL "%0, %1 # long_set_bit \n"
" or %0, %2 \n"
" " __SC "%0, %1 \n"
" beqz %0, 2f \n"
@@ -98,7 +98,6 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a |= mask;
@@ -106,6 +105,15 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
}
}
+static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ long_set_bit(bit, m);
+}
+
+
/*
* clear_bit - Clears a bit in memory
* @nr: Bit to clear
@@ -116,16 +124,16 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
* in order to ensure changes are visible on other processors.
*/
-static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_clear_bit(unsigned long nr, volatile unsigned long *addr)
{
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned long *m = (unsigned long *) addr;
+ unsigned short bit = nr;
unsigned long temp;
if (cpu_has_llsc && R10000_LLSC_WAR) {
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # clear_bit \n"
+ "1: " __LL "%0, %1 # long_clear_bit\n"
" and %0, %2 \n"
" " __SC "%0, %1 \n"
" beqzl %0, 1b \n"
@@ -135,7 +143,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
#ifdef CONFIG_CPU_MIPSR2
} else if (__builtin_constant_p(bit)) {
__asm__ __volatile__(
- "1: " __LL "%0, %1 # clear_bit \n"
+ "1: " __LL "%0, %1 # long_clear_bit\n"
" " __INS "%0, $0, %2, 1 \n"
" " __SC "%0, %1 \n"
" beqz %0, 2f \n"
@@ -148,7 +156,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
} else if (cpu_has_llsc) {
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # clear_bit \n"
+ "1: " __LL "%0, %1 # long_clear_bit\n"
" and %0, %2 \n"
" " __SC "%0, %1 \n"
" beqz %0, 2f \n"
@@ -163,7 +171,6 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a &= ~mask;
@@ -171,6 +178,14 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
}
}
+static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ long_clear_bit(bit, m);
+}
+
/*
* change_bit - Toggle a bit in memory
* @nr: Bit to change
@@ -180,37 +195,38 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+static inline void long_change_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned short bit = nr;
if (cpu_has_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
- " .set mips3 \n"
- "1: " __LL "%0, %1 # change_bit \n"
- " xor %0, %2 \n"
- " " __SC "%0, %1 \n"
- " beqzl %0, 1b \n"
- " .set mips0 \n"
+ " .set mips3 \n"
+ "1: " __LL "%0, %1 # long_change_bit \n"
+ " xor %0, %2 \n"
+ " " __SC "%0, %1 \n"
+ " beqzl %0, 1b \n"
+ " .set mips0 \n"
: "=&r" (temp), "=m" (*m)
: "ir" (1UL << bit), "m" (*m));
} else if (cpu_has_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
- " .set mips3 \n"
- "1: " __LL "%0, %1 # change_bit \n"
- " xor %0, %2 \n"
- " " __SC "%0, %1 \n"
- " beqz %0, 2f \n"
- " .subsection 2 \n"
- "2: b 1b \n"
- " .previous \n"
- " .set mips0 \n"
+ " .set mips3 \n"
+ "1: " __LL "%0, %1 # long_change_bit \n"
+ " xor %0, %2 \n"
+ " " __SC "%0, %1 \n"
+ " beqz %0, 2f \n"
+ " .subsection 2 \n"
+ "2: b 1b \n"
+ " .previous \n"
+ " .set mips0 \n"
: "=&r" (temp), "=m" (*m)
: "ir" (1UL << bit), "m" (*m));
} else {
@@ -218,7 +234,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
*a ^= mask;
@@ -226,6 +241,15 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
}
}
+static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ long_change_bit(bit, m);
+}
+
+
/*
* test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
@@ -234,19 +258,19 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_set_bit(unsigned long nr,
+static inline int long_test_and_set_bit(unsigned long nr,
volatile unsigned long *addr)
{
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned short bit = nr;
unsigned long res;
if (cpu_has_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_set_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_set_bit \n"
" or %2, %0, %3 \n"
" " __SC "%2, %1 \n"
" beqzl %2, 1b \n"
@@ -256,14 +280,14 @@ static inline int test_and_set_bit(unsigned long nr,
: "r" (1UL << bit), "m" (*m)
: "memory");
} else if (cpu_has_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set push \n"
" .set noreorder \n"
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_set_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_set_bit \n"
" or %2, %0, %3 \n"
" " __SC "%2, %1 \n"
" beqz %2, 2f \n"
@@ -281,7 +305,6 @@ static inline int test_and_set_bit(unsigned long nr,
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a);
@@ -294,6 +317,15 @@ static inline int test_and_set_bit(unsigned long nr,
return res != 0;
}
+static inline int test_and_set_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ return long_test_and_set_bit(bit, m);
+}
+
/*
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
@@ -302,19 +334,19 @@ static inline int test_and_set_bit(unsigned long nr,
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_clear_bit(unsigned long nr,
+static inline int long_test_and_clear_bit(unsigned long nr,
volatile unsigned long *addr)
{
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned short bit = nr;
unsigned long res;
if (cpu_has_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_clear_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_clear_bit\n"
" or %2, %0, %3 \n"
" xor %2, %3 \n"
" " __SC "%2, %1 \n"
@@ -326,11 +358,11 @@ static inline int test_and_clear_bit(unsigned long nr,
: "memory");
#ifdef CONFIG_CPU_MIPSR2
} else if (__builtin_constant_p(nr)) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
- "1: " __LL "%0, %1 # test_and_clear_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_clear_bit\n"
" " __EXT "%2, %0, %3, 1 \n"
" " __INS "%0, $0, %3, 1 \n"
" " __SC "%0, %1 \n"
@@ -343,14 +375,14 @@ static inline int test_and_clear_bit(unsigned long nr,
: "memory");
#endif
} else if (cpu_has_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set push \n"
" .set noreorder \n"
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_clear_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_clear_bit\n"
" or %2, %0, %3 \n"
" xor %2, %3 \n"
" " __SC "%2, %1 \n"
@@ -369,7 +401,6 @@ static inline int test_and_clear_bit(unsigned long nr,
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a);
@@ -382,6 +413,15 @@ static inline int test_and_clear_bit(unsigned long nr,
return res != 0;
}
+static inline int test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ return long_test_and_clear_bit(bit, m);
+}
+
/*
* test_and_change_bit - Change a bit and return its old value
* @nr: Bit to change
@@ -390,19 +430,19 @@ static inline int test_and_clear_bit(unsigned long nr,
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_change_bit(unsigned long nr,
+static inline int long_test_and_change_bit(unsigned long nr,
volatile unsigned long *addr)
{
- unsigned short bit = nr & SZLONG_MASK;
+ unsigned short bit = nr;
unsigned long res;
if (cpu_has_llsc && R10000_LLSC_WAR) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_change_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_change_bit\n"
" xor %2, %0, %3 \n"
" " __SC "%2, %1 \n"
" beqzl %2, 1b \n"
@@ -412,14 +452,14 @@ static inline int test_and_change_bit(unsigned long nr,
: "r" (1UL << bit), "m" (*m)
: "memory");
} else if (cpu_has_llsc) {
- unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned long *m = (unsigned long *) addr;
unsigned long temp;
__asm__ __volatile__(
" .set push \n"
" .set noreorder \n"
" .set mips3 \n"
- "1: " __LL "%0, %1 # test_and_change_bit \n"
+ "1: " __LL "%0, %1 # long_test_and_change_bit\n"
" xor %2, %0, %3 \n"
" " __SC "\t%2, %1 \n"
" beqz %2, 2f \n"
@@ -437,7 +477,6 @@ static inline int test_and_change_bit(unsigned long nr,
unsigned long mask;
unsigned long flags;
- a += nr >> SZLONG_LOG;
mask = 1UL << bit;
raw_local_irq_save(flags);
res = (mask & *a);
@@ -450,7 +489,17 @@ static inline int test_and_change_bit(unsigned long nr,
return res != 0;
}
+static inline int test_and_change_bit(unsigned long nr,
+ volatile unsigned long *addr)
+{
+ unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
+ unsigned short bit = nr & SZLONG_MASK;
+
+ return long_test_and_change_bit(bit, m);
+}
+
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
/*
* Return the bit position (0..63) of the most significant 1 bit in a word
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 015cb0d..8a091cd 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -108,7 +108,9 @@ static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr)
return (oldbit & mask) ? 1 : 0;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#ifdef __KERNEL__
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index 8144a27..032b39e 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -183,7 +183,9 @@ static __inline__ void set_bits(unsigned long mask, unsigned long *addr)
: "cc");
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
/*
* Return the zero-based bit position (LE, not IBM bit numbering) of
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index f79c9b7..a52679a 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -435,6 +435,8 @@ __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) {
__constant_test_bit((nr),(addr)) : \
__test_bit((nr),(addr)) )
+#include <asm-generic/bitops/atomic-long.h>
+
/*
* ffz = Find First Zero in word. Undefined if no zero exists,
* so code should check against ~0UL first..
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 1c16792..7b8c9b7 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -98,7 +98,9 @@ static inline int test_and_change_bit(int nr, volatile void * addr)
return retval;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
static inline unsigned long ffz(unsigned long word)
{
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index f3bdcdb..09c8824 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -109,7 +109,9 @@ static __inline__ int test_and_change_bit(int nr, volatile void * addr)
return retval;
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
static __inline__ unsigned long ffz(unsigned long word)
{
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 329e696..1aa4cbd 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -84,7 +84,9 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
(void) ___change_bit(ADDR, mask);
}
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#define smp_mb__before_clear_bit() do { } while(0)
#define smp_mb__after_clear_bit() do { } while(0)
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 3d5e1af..9eacf61 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -17,7 +17,9 @@ extern void set_bit(unsigned long nr, volatile unsigned long *addr);
extern void clear_bit(unsigned long nr, volatile unsigned long *addr);
extern void change_bit(unsigned long nr, volatile unsigned long *addr);
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#ifdef CONFIG_SMP
#define smp_mb__before_clear_bit() membar_storeload_loadload()
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index 1fa99ba..0810259 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -138,6 +138,7 @@ static inline int __test_bit (int nr, const void *addr)
#define smp_mb__before_clear_bit() barrier ()
#define smp_mb__after_clear_bit() barrier ()
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/fls64.h>
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
index d4dbbe5..1fb5315 100644
--- a/include/asm-x86_64/bitops.h
+++ b/include/asm-x86_64/bitops.h
@@ -254,6 +254,8 @@ static __inline__ int variable_test_bit(int nr, volatile const void * addr)
#undef ADDR
+#include <asm-generic/bitops/atomic-long.h>
+
extern long find_first_zero_bit(const unsigned long * addr, unsigned long size);
extern long find_next_zero_bit (const unsigned long * addr, long size, long offset);
extern long find_first_bit(const unsigned long * addr, unsigned long size);
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 1c1e0d9..1754bac 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -27,7 +27,9 @@
#define smp_mb__after_clear_bit() barrier()
#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/atomic-long.h>
#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/non-atomic-long.h>
#if XCHAL_HAVE_NSA
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 23f5514..3147f21 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -91,13 +91,19 @@ extern cpumask_t _unused_cpumask_arg_;
#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
{
- set_bit(cpu, dstp->bits);
+ if (NR_CPUS <= BITS_PER_LONG)
+ long_set_bit(cpu, dstp->bits);
+ else
+ set_bit(cpu, dstp->bits);
}
#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
{
- clear_bit(cpu, dstp->bits);
+ if (NR_CPUS <= BITS_PER_LONG)
+ long_clear_bit(cpu, dstp->bits);
+ else
+ clear_bit(cpu, dstp->bits);
}
#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
@@ -113,12 +119,25 @@ static inline void __cpus_clear(cpumask_t *dstp, int nbits)
}
/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+#define cpu_isset(cpu, cpumask) \
+({ \
+ int __res; \
+ \
+ if (NR_CPUS <= BITS_PER_LONG) \
+ __res = long_test_bit((cpu), (cpumask).bits); \
+ else \
+ __res = test_bit((cpu), (cpumask).bits); \
+ \
+ __res; \
+})
#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
{
- return test_and_set_bit(cpu, addr->bits);
+ if (NR_CPUS <= BITS_PER_LONG)
+ return long_test_and_set_bit(cpu, addr->bits);
+ else
+ return test_and_set_bit(cpu, addr->bits);
}
#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
next reply other threads:[~2007-09-25 15:52 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-09-25 15:52 Ralf Baechle [this message]
2007-09-28 17:34 ` Optimize cpumask functions for SMPs with < BITS_PER_LONG processors Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070925155200.GA7342@linux-mips.org \
--to=ralf@linux-mips.org \
--cc=linux-arch@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.