* [patch 06/44] generic __{, test_and_}{set, clear, change}_bit() and test_bit()
[not found] <20060201090224.536581000@localhost.localdomain>
@ 2006-02-01 9:02 ` Akinobu Mita
2006-02-01 9:02 ` [patch 10/44] generic fls64() Akinobu Mita
` (3 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-01 9:02 UTC (permalink / raw)
To: linux-kernel
Cc: Akinobu Mita, linux-mips, dev-etrax, linux-ia64, ultralinux,
Ian Molton, Hirokazu Takata, linuxsh-shmedia-dev, linuxppc-dev,
Ivan Kokshaysky, linuxsh-dev, sparclinux, Chris Zankel,
parisc-linux, Russell King, Richard Henderson
This patch introduces the C-language equivalents of the functions below:
void __set_bit(int nr, volatile unsigned long *addr);
void __clear_bit(int nr, volatile unsigned long *addr);
void __change_bit(int nr, volatile unsigned long *addr);
int __test_and_set_bit(int nr, volatile unsigned long *addr);
int __test_and_clear_bit(int nr, volatile unsigned long *addr);
int __test_and_change_bit(int nr, volatile unsigned long *addr);
int test_bit(int nr, const volatile unsigned long *addr);
In include/asm-generic/bitops/non-atomic.h
This code largely copied from:
asm-powerpc/bitops.h
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
include/asm-generic/bitops/non-atomic.h | 111 ++++++++++++++++++++++++++++++++
1 files changed, 111 insertions(+)
Index: 2.6-git/include/asm-generic/bitops/non-atomic.h
===================================================================
--- /dev/null
+++ 2.6-git/include/asm-generic/bitops/non-atomic.h
@@ -0,0 +1,111 @@
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p |= mask;
+}
+
+static __inline__ void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static __inline__ int test_bit(int nr, __const__ volatile unsigned long *addr)
+{
+ return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
--
^ permalink raw reply [flat|nested] 14+ messages in thread
* [patch 10/44] generic fls64()
[not found] <20060201090224.536581000@localhost.localdomain>
2006-02-01 9:02 ` [patch 06/44] generic __{, test_and_}{set, clear, change}_bit() and test_bit() Akinobu Mita
@ 2006-02-01 9:02 ` Akinobu Mita
2006-02-01 9:02 ` [patch 12/44] generic sched_find_first_bit() Akinobu Mita
` (2 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-01 9:02 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Linus Torvalds,
Yoshinori Sato, Hirokazu Takata, linuxsh-shmedia-dev, linux-m68k,
Ivan Kokshaysky, Richard Henderson, Akinobu Mita, Chris Zankel,
dev-etrax, ultralinux, Andi Kleen, linuxsh-dev, linux390,
Russell King, parisc-linux
This patch introduces the C-language equivalent of the function:
int fls64(__u64 x);
In include/asm-generic/bitops/fls64.h
This code largely copied from:
include/linux/bitops.h
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
include/asm-generic/bitops/fls64.h | 12 ++++++++++++
1 files changed, 12 insertions(+)
Index: 2.6-git/include/asm-generic/bitops/fls64.h
===================================================================
--- /dev/null
+++ 2.6-git/include/asm-generic/bitops/fls64.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
+#define _ASM_GENERIC_BITOPS_FLS64_H_
+
+static inline int fls64(__u64 x)
+{
+ __u32 h = x >> 32;
+ if (h)
+ return fls(x) + 32;
+ return fls(x);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
--
^ permalink raw reply [flat|nested] 14+ messages in thread
* [patch 12/44] generic sched_find_first_bit()
[not found] <20060201090224.536581000@localhost.localdomain>
2006-02-01 9:02 ` [patch 06/44] generic __{, test_and_}{set, clear, change}_bit() and test_bit() Akinobu Mita
2006-02-01 9:02 ` [patch 10/44] generic fls64() Akinobu Mita
@ 2006-02-01 9:02 ` Akinobu Mita
2006-02-03 3:58 ` [parisc-linux] " Grant Grundler
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
2006-02-01 9:02 ` [patch 31/44] powerpc: use generic bitops Akinobu Mita
4 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2006-02-01 9:02 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Linus Torvalds,
Yoshinori Sato, Hirokazu Takata, linuxsh-dev, linux-m68k,
Akinobu Mita, Chris Zankel, dev-etrax, ultralinux, Andi Kleen,
linuxsh-shmedia-dev, linux390, Russell King, parisc-linux
This patch introduces the C-language equivalent of the function:
int sched_find_first_bit(const unsigned long *b);
In include/asm-generic/bitops/sched.h
This code largely copied from:
include/asm-powerpc/bitops.h
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
include/asm-generic/bitops/sched.h | 36 ++++++++++++++++++++++++++++++++++++
1 files changed, 36 insertions(+)
Index: 2.6-git/include/asm-generic/bitops/sched.h
===================================================================
--- /dev/null
+++ 2.6-git/include/asm-generic/bitops/sched.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_GENERIC_BITOPS_SCHED_H_
+#define _ASM_GENERIC_BITOPS_SCHED_H_
+
+#include <linux/compiler.h> /* unlikely() */
+#include <asm/types.h>
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 140-bit bitmap where the first 100 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 140
+ * bits is cleared.
+ */
+static inline int sched_find_first_bit(const unsigned long *b)
+{
+#if BITS_PER_LONG == 64
+ if (unlikely(b[0]))
+ return __ffs(b[0]);
+ if (unlikely(b[1]))
+ return __ffs(b[1]) + 64;
+ return __ffs(b[2]) + 128;
+#elif BITS_PER_LONG == 32
+ if (unlikely(b[0]))
+ return __ffs(b[0]);
+ if (unlikely(b[1]))
+ return __ffs(b[1]) + 32;
+ if (unlikely(b[2]))
+ return __ffs(b[2]) + 64;
+ if (b[3])
+ return __ffs(b[3]) + 96;
+ return __ffs(b[4]) + 128;
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+#endif /* _ASM_GENERIC_BITOPS_SCHED_H_ */
--
^ permalink raw reply [flat|nested] 14+ messages in thread
* [patch 14/44] generic hweight{64,32,16,8}()
[not found] <20060201090224.536581000@localhost.localdomain>
` (2 preceding siblings ...)
2006-02-01 9:02 ` [patch 12/44] generic sched_find_first_bit() Akinobu Mita
@ 2006-02-01 9:02 ` Akinobu Mita
2006-02-01 9:06 ` Andi Kleen
` (2 more replies)
2006-02-01 9:02 ` [patch 31/44] powerpc: use generic bitops Akinobu Mita
4 siblings, 3 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-01 9:02 UTC (permalink / raw)
To: linux-kernel
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Linus Torvalds,
Yoshinori Sato, Hirokazu Takata, linuxsh-shmedia-dev, linux-m68k,
Ivan Kokshaysky, Richard Henderson, Akinobu Mita, Chris Zankel,
dev-etrax, ultralinux, Andi Kleen, linuxsh-dev, linux390,
Russell King, parisc-linux
This patch introduces the C-language equivalents of the functions below:
unsigned int hweight32(unsigned int w);
unsigned int hweight16(unsigned int w);
unsigned int hweight8(unsigned int w);
unsigned long hweight64(__u64 w);
In include/asm-generic/bitops/hweight.h
This code largely copied from:
include/linux/bitops.h
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
include/asm-generic/bitops/hweight.h | 54 +++++++++++++++++++++++++++++++++++
1 files changed, 54 insertions(+)
Index: 2.6-git/include/asm-generic/bitops/hweight.h
===================================================================
--- /dev/null
+++ 2.6-git/include/asm-generic/bitops/hweight.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
+
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+static inline unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+ res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+ return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+
+static inline unsigned int hweight16(unsigned int w)
+{
+ unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
+ return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+}
+
+static inline unsigned int hweight8(unsigned int w)
+{
+ unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res & 0x0F) + ((res >> 4) & 0x0F);
+}
+
+static inline unsigned long hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+ u64 res;
+ res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
+ res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
+ res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
+ return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
--
^ permalink raw reply [flat|nested] 14+ messages in thread
* [patch 31/44] powerpc: use generic bitops
[not found] <20060201090224.536581000@localhost.localdomain>
` (3 preceding siblings ...)
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
@ 2006-02-01 9:02 ` Akinobu Mita
4 siblings, 0 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-01 9:02 UTC (permalink / raw)
To: linux-kernel; +Cc: Akinobu Mita, linuxppc-dev
- remove __{,test_and_}{set,clear,change}_bit() and test_bit()
- remove generic_fls64()
- remove generic_hweight{64,32,16,8}()
- remove sched_find_first_bit()
Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
include/asm-powerpc/bitops.h | 105 +------------------------------------------
1 files changed, 4 insertions(+), 101 deletions(-)
Index: 2.6-git/include/asm-powerpc/bitops.h
===================================================================
--- 2.6-git.orig/include/asm-powerpc/bitops.h
+++ 2.6-git/include/asm-powerpc/bitops.h
@@ -184,72 +184,7 @@ static __inline__ void set_bits(unsigned
: "cc");
}
-/* Non-atomic versions */
-static __inline__ int test_bit(unsigned long nr,
- __const__ volatile unsigned long *addr)
-{
- return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-static __inline__ void __set_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
- *p |= mask;
-}
-
-static __inline__ void __clear_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
- *p &= ~mask;
-}
-
-static __inline__ void __change_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
- *p ^= mask;
-}
-
-static __inline__ int __test_and_set_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
- unsigned long old = *p;
-
- *p = old | mask;
- return (old & mask) != 0;
-}
-
-static __inline__ int __test_and_clear_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
- unsigned long old = *p;
-
- *p = old & ~mask;
- return (old & mask) != 0;
-}
-
-static __inline__ int __test_and_change_bit(unsigned long nr,
- volatile unsigned long *addr)
-{
- unsigned long mask = BITOP_MASK(nr);
- unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
- unsigned long old = *p;
-
- *p = old ^ mask;
- return (old & mask) != 0;
-}
+#include <asm-generic/bitops/non-atomic.h>
/*
* Return the zero-based bit position (LE, not IBM bit numbering) of
@@ -310,16 +245,9 @@ static __inline__ int fls(unsigned int x
asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
return 32 - lz;
}
-#define fls64(x) generic_fls64(x)
+#include <asm-generic/bitops/fls64.h>
-/*
- * hweightN: returns the hamming weight (i.e. the number
- * of bits set) of a N-bit word
- */
-#define hweight64(x) generic_hweight64(x)
-#define hweight32(x) generic_hweight32(x)
-#define hweight16(x) generic_hweight16(x)
-#define hweight8(x) generic_hweight8(x)
+#include <asm-generic/bitops/hweight.h>
#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
unsigned long find_next_zero_bit(const unsigned long *addr,
@@ -397,32 +325,7 @@ unsigned long find_next_zero_le_bit(cons
#define minix_find_first_zero_bit(addr,size) \
find_first_zero_le_bit((unsigned long *)addr, size)
-/*
- * Every architecture must define this function. It's the fastest
- * way of searching a 140-bit bitmap where the first 100 bits are
- * unlikely to be set. It's guaranteed that at least one of the 140
- * bits is cleared.
- */
-static inline int sched_find_first_bit(const unsigned long *b)
-{
-#ifdef CONFIG_PPC64
- if (unlikely(b[0]))
- return __ffs(b[0]);
- if (unlikely(b[1]))
- return __ffs(b[1]) + 64;
- return __ffs(b[2]) + 128;
-#else
- if (unlikely(b[0]))
- return __ffs(b[0]);
- if (unlikely(b[1]))
- return __ffs(b[1]) + 32;
- if (unlikely(b[2]))
- return __ffs(b[2]) + 64;
- if (b[3])
- return __ffs(b[3]) + 96;
- return __ffs(b[4]) + 128;
-#endif
-}
+#include <asm-generic/bitops/sched.h>
#endif /* __KERNEL__ */
--
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
@ 2006-02-01 9:06 ` Andi Kleen
2006-02-01 9:26 ` Michael Tokarev
2006-02-02 1:26 ` Gabriel Paubert
2006-02-03 8:31 ` Ulrich Eckhardt
2 siblings, 1 reply; 14+ messages in thread
From: Andi Kleen @ 2006-02-01 9:06 UTC (permalink / raw)
To: Akinobu Mita
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Yoshinori Sato,
Hirokazu Takata, linuxsh-dev, Linus Torvalds, Ivan Kokshaysky,
Richard Henderson, Chris Zankel, dev-etrax, ultralinux,
linux-m68k, linux-kernel, linuxsh-shmedia-dev, linux390,
Russell King, parisc-linux
On Wednesday 01 February 2006 10:02, Akinobu Mita wrote:
> +static inline unsigned int hweight32(unsigned int w)
> +{
> + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
> + res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
> + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
> + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
> + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
> +}
How large are these functions on x86? Maybe it would be better to not inline them,
but put it into some C file out of line.
-Andi
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 9:06 ` Andi Kleen
@ 2006-02-01 9:26 ` Michael Tokarev
2006-02-01 10:24 ` Andi Kleen
0 siblings, 1 reply; 14+ messages in thread
From: Michael Tokarev @ 2006-02-01 9:26 UTC (permalink / raw)
To: Andi Kleen
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Yoshinori Sato,
Hirokazu Takata, linuxsh-dev, Linus Torvalds, Ivan Kokshaysky,
Richard Henderson, Akinobu Mita, Chris Zankel, dev-etrax,
ultralinux, linux-m68k, linux-kernel, linuxsh-shmedia-dev,
linux390, Russell King, parisc-linux
Andi Kleen wrote:
> On Wednesday 01 February 2006 10:02, Akinobu Mita wrote:
>
>>+static inline unsigned int hweight32(unsigned int w)
[]
> How large are these functions on x86? Maybe it would be better to not inline them,
> but put it into some C file out of line.
hweight8 47 bytes
hweight16 76 bytes
hweight32 97 bytes
hweight64 56 bytes (NOT inlining hweight32)
hweight64 197 bytes (inlining hweight32)
Those are when compiled as separate non-inlined functions,
with pushl %ebp and ret.
/mjt
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 9:26 ` Michael Tokarev
@ 2006-02-01 10:24 ` Andi Kleen
2006-02-02 12:50 ` Akinobu Mita
0 siblings, 1 reply; 14+ messages in thread
From: Andi Kleen @ 2006-02-01 10:24 UTC (permalink / raw)
To: Michael Tokarev
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Yoshinori Sato,
Hirokazu Takata, linuxsh-dev, Linus Torvalds, Ivan Kokshaysky,
Richard Henderson, Akinobu Mita, Chris Zankel, dev-etrax,
ultralinux, linux-m68k, linux-kernel, linuxsh-shmedia-dev,
linux390, Russell King, parisc-linux
On Wednesday 01 February 2006 10:26, Michael Tokarev wrote:
> Andi Kleen wrote:
> > On Wednesday 01 February 2006 10:02, Akinobu Mita wrote:
> >
> >>+static inline unsigned int hweight32(unsigned int w)
> []
> > How large are these functions on x86? Maybe it would be better to not inline them,
> > but put it into some C file out of line.
>
> hweight8 47 bytes
> hweight16 76 bytes
> hweight32 97 bytes
> hweight64 56 bytes (NOT inlining hweight32)
> hweight64 197 bytes (inlining hweight32)
>
> Those are when compiled as separate non-inlined functions,
> with pushl %ebp and ret.
This would argue for moving them out of line.
-Andi
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
2006-02-01 9:06 ` Andi Kleen
@ 2006-02-02 1:26 ` Gabriel Paubert
2006-02-06 11:52 ` Akinobu Mita
2006-02-03 8:31 ` Ulrich Eckhardt
2 siblings, 1 reply; 14+ messages in thread
From: Gabriel Paubert @ 2006-02-02 1:26 UTC (permalink / raw)
To: Akinobu Mita
Cc: linux-mips, linux-m68k, linux-ia64, Ian Molton, Andi Kleen,
David Howells, linuxppc-dev, Greg Ungerer, sparclinux,
Miles Bader, Yoshinori Sato, Hirokazu Takata, linuxsh-dev,
Linus Torvalds, Ivan Kokshaysky, Richard Henderson, Chris Zankel,
dev-etrax, ultralinux, linux-kernel, linuxsh-shmedia-dev,
linux390, Russell King, parisc-linux
On Wed, Feb 01, 2006 at 06:02:38PM +0900, Akinobu Mita wrote:
>
> This patch introduces the C-language equivalents of the functions below:
>
> unsigned int hweight32(unsigned int w);
> unsigned int hweight16(unsigned int w);
> unsigned int hweight8(unsigned int w);
> unsigned long hweight64(__u64 w);
>
> In include/asm-generic/bitops/hweight.h
>
> This code largely copied from:
> include/linux/bitops.h
>
> Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
> include/asm-generic/bitops/hweight.h | 54 +++++++++++++++++++++++++++++++++++
> 1 files changed, 54 insertions(+)
>
> Index: 2.6-git/include/asm-generic/bitops/hweight.h
> ===================================================================
> --- /dev/null
> +++ 2.6-git/include/asm-generic/bitops/hweight.h
> @@ -0,0 +1,54 @@
> +#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
> +#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
> +
> +#include <asm/types.h>
> +
> +/**
> + * hweightN - returns the hamming weight of a N-bit word
> + * @x: the word to weigh
> + *
> + * The Hamming Weight of a number is the total number of bits set in it.
> + */
> +
> +static inline unsigned int hweight32(unsigned int w)
> +{
> + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
> + res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
> + res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
> + res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
> + return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
> +}
The first step can be implemented slightly better:
unsigned int res = w-((w>>1)&0x55555555);
as I found once on the web[1].
Several of the following steps can also be simplified
by omitting the masking when the result can't possibly
cause a carry to propagate too far.
This might also have a non negligible impact
on code size.
> +
> +static inline unsigned int hweight16(unsigned int w)
> +{
> + unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
> + res = (res & 0x3333) + ((res >> 2) & 0x3333);
> + res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
> + return (res & 0x00FF) + ((res >> 8) & 0x00FF);
> +}
> +
> +static inline unsigned int hweight8(unsigned int w)
> +{
> + unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
> + res = (res & 0x33) + ((res >> 2) & 0x33);
> + return (res & 0x0F) + ((res >> 4) & 0x0F);
> +}
> +
> +static inline unsigned long hweight64(__u64 w)
> +{
> +#if BITS_PER_LONG == 32
> + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
> +#elif BITS_PER_LONG == 64
> + u64 res;
> + res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
> + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
> + res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
> + res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
> + res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
> + return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
> +#else
> +#error BITS_PER_LONG not defined
> +#endif
> +}
> +
> +#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
>
Regards,
Gabriel
[1] It might be better to write the first line
unsigned res = w - ((w&0xaaaaaaaa)>>1);
but I can never remember what the C standard guarantess about
right shifts values (very little IIRC). I believe that it will
work properly on all architectures that GCC supports, however,
and that it will help on many.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 10:24 ` Andi Kleen
@ 2006-02-02 12:50 ` Akinobu Mita
0 siblings, 0 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-02 12:50 UTC (permalink / raw)
To: Andi Kleen
Cc: linux-mips, linux-ia64, Ian Molton, Michael Tokarev,
David Howells, linuxppc-dev, Greg Ungerer, sparclinux,
Miles Bader, Yoshinori Sato, Hirokazu Takata, linuxsh-dev,
Linus Torvalds, Ivan Kokshaysky, Richard Henderson, Chris Zankel,
dev-etrax, ultralinux, linux-m68k, linux-kernel,
linuxsh-shmedia-dev, linux390, Russell King, parisc-linux
On Wed, Feb 01, 2006 at 11:24:27AM +0100, Andi Kleen wrote:
> On Wednesday 01 February 2006 10:26, Michael Tokarev wrote:
> > Andi Kleen wrote:
> > > On Wednesday 01 February 2006 10:02, Akinobu Mita wrote:
> > >
> > >>+static inline unsigned int hweight32(unsigned int w)
> > []
> > > How large are these functions on x86? Maybe it would be better to not inline them,
> > > but put it into some C file out of line.
> >
> > hweight8 47 bytes
> > hweight16 76 bytes
> > hweight32 97 bytes
> > hweight64 56 bytes (NOT inlining hweight32)
> > hweight64 197 bytes (inlining hweight32)
> >
> > Those are when compiled as separate non-inlined functions,
> > with pushl %ebp and ret.
>
> This would argue for moving them out of line.
This patch will put hweight*() into lib/hweight.c
Index: 2.6-git/include/asm-generic/bitops/hweight.h
===================================================================
--- 2.6-git.orig/include/asm-generic/bitops/hweight.h
+++ 2.6-git/include/asm-generic/bitops/hweight.h
@@ -1,54 +1,9 @@
#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
-#include <asm/types.h>
-
-/**
- * hweightN - returns the hamming weight of a N-bit word
- * @x: the word to weigh
- *
- * The Hamming Weight of a number is the total number of bits set in it.
- */
-
-static inline unsigned int hweight32(unsigned int w)
-{
- unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
- res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
- res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
- res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
- return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
-}
-
-static inline unsigned int hweight16(unsigned int w)
-{
- unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
- res = (res & 0x3333) + ((res >> 2) & 0x3333);
- res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
- return (res & 0x00FF) + ((res >> 8) & 0x00FF);
-}
-
-static inline unsigned int hweight8(unsigned int w)
-{
- unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
- res = (res & 0x33) + ((res >> 2) & 0x33);
- return (res & 0x0F) + ((res >> 4) & 0x0F);
-}
-
-static inline unsigned long hweight64(__u64 w)
-{
-#if BITS_PER_LONG == 32
- return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
-#elif BITS_PER_LONG == 64
- u64 res;
- res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
- res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
- res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
- res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
- res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
- return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
+extern unsigned int hweight32(unsigned int w);
+extern unsigned int hweight16(unsigned int w);
+extern unsigned int hweight8(unsigned int w);
+extern unsigned long hweight64(__u64 w);
#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
Index: 2.6-git/lib/Makefile
===================================================================
--- 2.6-git.orig/lib/Makefile
+++ 2.6-git/lib/Makefile
@@ -5,7 +5,7 @@
lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
- sha1.o
+ sha1.o hweight.o
lib-y += kobject.o kref.o kobject_uevent.o klist.o
Index: 2.6-git/lib/hweight.c
===================================================================
--- /dev/null
+++ 2.6-git/lib/hweight.c
@@ -0,0 +1,54 @@
+#include <linux/module.h>
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+ res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+ return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+}
+EXPORT_SYMBOL(hweight32);
+
+unsigned int hweight16(unsigned int w)
+{
+ unsigned int res = (w & 0x5555) + ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res & 0x0F0F) + ((res >> 4) & 0x0F0F);
+ return (res & 0x00FF) + ((res >> 8) & 0x00FF);
+}
+EXPORT_SYMBOL(hweight16);
+
+unsigned int hweight8(unsigned int w)
+{
+ unsigned int res = (w & 0x55) + ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res & 0x0F) + ((res >> 4) & 0x0F);
+}
+EXPORT_SYMBOL(hweight8);
+
+unsigned long hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+ u64 res;
+ res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
+ res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
+ res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
+ return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+EXPORT_SYMBOL(hweight64);
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [patch 10/44] generic fls64()
@ 2006-02-02 15:05 Rune Torgersen
0 siblings, 0 replies; 14+ messages in thread
From: Rune Torgersen @ 2006-02-02 15:05 UTC (permalink / raw)
To: Akinobu Mita, linux-kernel
Cc: linux-mips, linux-ia64, Ian Molton, David Howells, linuxppc-dev,
Greg Ungerer, sparclinux, Miles Bader, Linus Torvalds,
Yoshinori Sato, Hirokazu Takata, linuxsh-dev, linux-m68k,
Ivan Kokshaysky, Richard Henderson, Chris Zankel, dev-etrax,
ultralinux, Andi Kleen, linuxsh-shmedia-dev, linux390,
Russell King, parisc-linux
> From: Akinobu Mita
> Sent: Wednesday, February 01, 2006 03:03
> +static inline int fls64(__u64 x)
> +{
> + __u32 h =3D x >> 32;
> + if (h)
> + return fls(x) + 32;
Shouldn't this be return fls(h) + 32; ??
^^^
> + return fls(x);
> +}
> +
> +#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
>=20
> --
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
>=20
>=20
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [parisc-linux] [patch 12/44] generic sched_find_first_bit()
2006-02-01 9:02 ` [patch 12/44] generic sched_find_first_bit() Akinobu Mita
@ 2006-02-03 3:58 ` Grant Grundler
0 siblings, 0 replies; 14+ messages in thread
From: Grant Grundler @ 2006-02-03 3:58 UTC (permalink / raw)
To: Akinobu Mita
Cc: linux-mips, linux-ia64, Ian Molton, Andi Kleen, David Howells,
linuxppc-dev, Greg Ungerer, sparclinux, Miles Bader,
Yoshinori Sato, Hirokazu Takata, linuxsh-shmedia-dev,
Linus Torvalds, Chris Zankel, dev-etrax, ultralinux, linux-m68k,
linux-kernel, linuxsh-dev, linux390, Russell King, parisc-linux
On Wed, Feb 01, 2006 at 06:02:36PM +0900, Akinobu Mita wrote:
> This patch introduces the C-language equivalent of the function:
> int sched_find_first_bit(const unsigned long *b);
Akinobu, would you prefer this is a slightly cleaner way?
(Not compile tested)
static inline int sched_find_first_bit(const unsigned long *b)
{
if (unlikely(b[0]))
return __ffs(b[0]);
if (unlikely(b[1]))
return __ffs(b[1]) + BITS_PER_LONG;
#if BITS_PER_LONG == 32
if (unlikely(b[2]))
return __ffs(b[2]) + 64;
if (b[3])
return __ffs(b[3]) + 96;
#endif
return __ffs(b[128/BITS_PER_LONG]) + 128;
}
If BITS_PER_LONG isn't defined, the link step will fail and point
at a some unknown .o as the offender. But it's the responsibility
of the header file to make sure it's including the BITS_PER_LONG
definition, not the code that calls sched_find_first_bit().
hth,
grant
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
2006-02-01 9:06 ` Andi Kleen
2006-02-02 1:26 ` Gabriel Paubert
@ 2006-02-03 8:31 ` Ulrich Eckhardt
2 siblings, 0 replies; 14+ messages in thread
From: Ulrich Eckhardt @ 2006-02-03 8:31 UTC (permalink / raw)
To: Akinobu Mita
Cc: linux-mips, linux-ia64, Ian Molton, Andi Kleen, David Howells,
linuxppc-dev, Greg Ungerer, sparclinux, Miles Bader,
Yoshinori Sato, Hirokazu Takata, linuxsh-dev, Linus Torvalds,
Ivan Kokshaysky, Richard Henderson, Chris Zankel, dev-etrax,
ultralinux, linux-m68k, linux-kernel, linuxsh-shmedia-dev,
linux390, Russell King, parisc-linux
On Wednesday 01 February 2006 10:02, Akinobu Mita wrote:
> unsigned int hweight32(unsigned int w);
> unsigned int hweight16(unsigned int w);
> unsigned int hweight8(unsigned int w);
> unsigned long hweight64(__u64 w);
IMHO, this should use explicitly sized integers like __u8, __u16 etc, unless
there are stringent reasons like better register use - which is hard to tell
for generic C code. Also, why on earth is the returntype for hweight64 a
long?
> +static inline unsigned int hweight32(unsigned int w)
> +{
> + unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
> + res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
[...]
Why not use unsigned constants here?
> +static inline unsigned long hweight64(__u64 w)
> +{
[..]
> + u64 res;
> + res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
Why not use initialisation here, too?
just my 2c
Uli
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [patch 14/44] generic hweight{64,32,16,8}()
2006-02-02 1:26 ` Gabriel Paubert
@ 2006-02-06 11:52 ` Akinobu Mita
0 siblings, 0 replies; 14+ messages in thread
From: Akinobu Mita @ 2006-02-06 11:52 UTC (permalink / raw)
To: Gabriel Paubert
Cc: linux-mips, linux-m68k, linux-ia64, Ian Molton, Balbir Singh,
Andi Kleen, David Howells, linuxppc-dev, Greg Ungerer, sparclinux,
Miles Bader, Yoshinori Sato, Hirokazu Takata, linuxsh-dev,
Linus Torvalds, Ivan Kokshaysky, linux, Richard Henderson,
Chris Zankel, dev-etrax, ultralinux, linux-kernel,
linuxsh-shmedia-dev, linux390, Russell King, parisc-linux
On Thu, Feb 02, 2006 at 02:26:38AM +0100, Gabriel Paubert wrote:
>
> The first step can be implemented slightly better:
>
> unsigned int res = w-((w>>1)&0x55555555);
>
Yes. I've got many advices about hweight speedup.
static unsigned int hweight32(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x55555555);
res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
res = (res + (res >> 4)) & 0x0F0F0F0F;
res = res + (res >> 8);
return (res + (res >> 16)) & 0x000000FF;
}
static unsigned int hweight16(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x5555);
res = (res & 0x3333) + ((res >> 2) & 0x3333);
res = (res + (res >> 4)) & 0x0F0F;
return (res + (res >> 8)) & 0x00FF;
}
static unsigned int hweight8(unsigned int w)
{
unsigned int res = w - ((w >> 1) & 0x55);
res = (res & 0x33) + ((res >> 2) & 0x33);
return (res + (res >> 4)) & 0x0F;
}
static unsigned long hweight64(__u64 w)
{
#if BITS_PER_LONG < 64
return hweight32((unsigned int)(w >> 32)) +
hweight32((unsigned int)w);
#else
__u64 res = w - ((w >> 1) & 0x5555555555555555ul);
res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
res = res + (res >> 8);
res = res + (res >> 16);
return (res + (res >> 32)) & 0x00000000000000FFul;
#endif
}
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2006-02-06 11:53 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20060201090224.536581000@localhost.localdomain>
2006-02-01 9:02 ` [patch 06/44] generic __{, test_and_}{set, clear, change}_bit() and test_bit() Akinobu Mita
2006-02-01 9:02 ` [patch 10/44] generic fls64() Akinobu Mita
2006-02-01 9:02 ` [patch 12/44] generic sched_find_first_bit() Akinobu Mita
2006-02-03 3:58 ` [parisc-linux] " Grant Grundler
2006-02-01 9:02 ` [patch 14/44] generic hweight{64,32,16,8}() Akinobu Mita
2006-02-01 9:06 ` Andi Kleen
2006-02-01 9:26 ` Michael Tokarev
2006-02-01 10:24 ` Andi Kleen
2006-02-02 12:50 ` Akinobu Mita
2006-02-02 1:26 ` Gabriel Paubert
2006-02-06 11:52 ` Akinobu Mita
2006-02-03 8:31 ` Ulrich Eckhardt
2006-02-01 9:02 ` [patch 31/44] powerpc: use generic bitops Akinobu Mita
2006-02-02 15:05 [patch 10/44] generic fls64() Rune Torgersen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).