public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bitops: simplify generic bit finding functions
@ 2008-04-27 20:19 Harvey Harrison
  2008-04-27 20:26 ` Linus Torvalds
  2008-04-28 14:32 ` Alexander van Heukelum
  0 siblings, 2 replies; 27+ messages in thread
From: Harvey Harrison @ 2008-04-27 20:19 UTC (permalink / raw)
  To: Ingo Molnar, Linus Torvalds; +Cc: Andrew Morton, LKML

No need for a sentinal if we explicitly catch the no bits/all bits set
cases, make it clear they are special cases returning size/BITS_PER_LONG.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
---
 include/linux/bitops.h |   93 ++++++++++++++++++++----------------------------
 1 files changed, 39 insertions(+), 54 deletions(-)

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 48bde60..d9eb58a 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -127,18 +127,17 @@ extern unsigned long __find_first_bit(const unsigned long *addr,
 static __always_inline unsigned long
 find_first_bit(const unsigned long *addr, unsigned long size)
 {
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG))
-		return __ffs((*addr) | (1ul << size));
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
-		return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr);
+	/*
+	 * Avoid a function call if the bitmap size is a constant and not
+	 * bigger than BITS_PER_LONG.  Ensure we return size if there are
+	 * no set bits.
+	 */
+	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
+		if (*addr == 0)
+			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
+		else
+			return __ffs(*addr);
+	}
 
 	/* size is not constant or too big */
 	return __find_first_bit(addr, size);
@@ -157,20 +156,17 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr,
 static __always_inline unsigned long
 find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
-		return __ffs(~(*addr) | (1ul << size));
+	/*
+	 * Avoid a function call if the bitmap size is a constant and not
+	 * bigger than BITS_PER_LONG.  Ensure we return size if all bits set.
+	 */
+	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
+		if ((~(*addr)) == 0)
+			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
+		else
+			return __ffs(~(*addr));
 	}
 
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
-		return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr));
-
 	/* size is not constant or too big */
 	return __find_first_zero_bit(addr, size);
 }
@@ -192,22 +188,17 @@ find_next_bit(const unsigned long *addr, unsigned long size,
 {
 	unsigned long value;
 
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
+	/*
+	 * Avoid a function call if the bitmap size is a constant and not
+	 * bigger than BITS_PER_LONG.  Ensure we return size if there are
+	 * no set bits.
+	 */
+	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
 		value = (*addr) & ((~0ul) << offset);
-		value |= (1ul << size);
-		return __ffs(value);
-	}
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
-		value = (*addr) & ((~0ul) << offset);
-		return (value == 0) ? BITS_PER_LONG : __ffs(value);
+		if (value == 0)
+			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
+		else
+			return __ffs(value);
 	}
 
 	/* size is not constant or too big */
@@ -229,22 +220,16 @@ find_next_zero_bit(const unsigned long *addr, unsigned long size,
 {
 	unsigned long value;
 
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
-		value = (~(*addr)) & ((~0ul) << offset);
-		value |= (1ul << size);
-		return __ffs(value);
-	}
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
+	/*
+	 * Avoid a function call if the bitmap size is a constant and not
+	 * bigger than BITS_PER_LONG.  Ensure we return size if all bits set.
+	 */
+	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
 		value = (~(*addr)) & ((~0ul) << offset);
-		return (value == 0) ? BITS_PER_LONG : __ffs(value);
+		if (value == 0)
+			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
+		else
+			return __ffs(value);
 	}
 
 	/* size is not constant or too big */
-- 
1.5.5.1.270.g89765




^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison
@ 2008-04-27 20:26 ` Linus Torvalds
  2008-04-27 20:29   ` Harvey Harrison
                     ` (2 more replies)
  2008-04-28 14:32 ` Alexander van Heukelum
  1 sibling, 3 replies; 27+ messages in thread
From: Linus Torvalds @ 2008-04-27 20:26 UTC (permalink / raw)
  To: Harvey Harrison; +Cc: Ingo Molnar, Andrew Morton, LKML



On Sun, 27 Apr 2008, Harvey Harrison wrote:
>
> No need for a sentinal if we explicitly catch the no bits/all bits set
> cases, make it clear they are special cases returning size/BITS_PER_LONG.

These things need to be re-done anyway. David already complained that the 
thing inlines to something much much too big.

Let's just make it not be an inline at all.

		Linus

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:26 ` Linus Torvalds
@ 2008-04-27 20:29   ` Harvey Harrison
  2008-04-27 20:36     ` Al Viro
  2008-04-27 20:38     ` Linus Torvalds
  2008-04-28 14:04   ` Thomas Gleixner
  2008-04-28 14:58   ` Alexander van Heukelum
  2 siblings, 2 replies; 27+ messages in thread
From: Harvey Harrison @ 2008-04-27 20:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, LKML

On Sun, 2008-04-27 at 13:26 -0700, Linus Torvalds wrote:
> 
> On Sun, 27 Apr 2008, Harvey Harrison wrote:
> >
> > No need for a sentinal if we explicitly catch the no bits/all bits set
> > cases, make it clear they are special cases returning size/BITS_PER_LONG.
> 
> These things need to be re-done anyway. David already complained that the 
> thing inlines to something much much too big.
> 
> Let's just make it not be an inline at all.

Oh, I didn't realize, I only did this because sparse started spewing out
lots of:
include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long

due to shift by size there, and again on line 202...I just wanted something
that sparse wouldn't warn about and was a little easier to understand to boot.

Cheers,

Harvey


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:29   ` Harvey Harrison
@ 2008-04-27 20:36     ` Al Viro
  2008-04-27 20:38       ` Harvey Harrison
  2008-04-27 20:38     ` Linus Torvalds
  1 sibling, 1 reply; 27+ messages in thread
From: Al Viro @ 2008-04-27 20:36 UTC (permalink / raw)
  To: Harvey Harrison
  Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, LKML, linux-sparse

On Sun, Apr 27, 2008 at 01:29:21PM -0700, Harvey Harrison wrote:

> Oh, I didn't realize, I only did this because sparse started spewing out
> lots of:
> include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long
> 
> due to shift by size there, and again on line 202...I just wanted something
> that sparse wouldn't warn about and was a little easier to understand to boot.

That's a sparse problem, really.  I wonder if we simply should introduce a
new node type: EXPR_WARN.  So that expand would generate those from things
like division by zero/overflow/bad shift *and* emitting an insn for those
would generate a stored warning.

Objections?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:36     ` Al Viro
@ 2008-04-27 20:38       ` Harvey Harrison
  0 siblings, 0 replies; 27+ messages in thread
From: Harvey Harrison @ 2008-04-27 20:38 UTC (permalink / raw)
  To: Al Viro; +Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, LKML, linux-sparse

On Sun, 2008-04-27 at 21:36 +0100, Al Viro wrote:
> On Sun, Apr 27, 2008 at 01:29:21PM -0700, Harvey Harrison wrote:
> 
> > Oh, I didn't realize, I only did this because sparse started spewing out
> > lots of:
> > include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long
> > 
> > due to shift by size there, and again on line 202...I just wanted something
> > that sparse wouldn't warn about and was a little easier to understand to boot.
> 
> That's a sparse problem, really.  I wonder if we simply should introduce a
> new node type: EXPR_WARN.  So that expand would generate those from things
> like division by zero/overflow/bad shift *and* emitting an insn for those
> would generate a stored warning.

Well, even though it is a sparse problem, I think my revised version was
cleaner code anyway.

> 
> Objections?

None here

Harvey


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:29   ` Harvey Harrison
  2008-04-27 20:36     ` Al Viro
@ 2008-04-27 20:38     ` Linus Torvalds
  2008-04-27 21:02       ` Al Viro
  1 sibling, 1 reply; 27+ messages in thread
From: Linus Torvalds @ 2008-04-27 20:38 UTC (permalink / raw)
  To: Harvey Harrison; +Cc: Ingo Molnar, Andrew Morton, LKML



On Sun, 27 Apr 2008, Harvey Harrison wrote:
>
> Oh, I didn't realize, I only did this because sparse started spewing out
> lots of:
> include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long

Well, that's really just a sparse bug/misfeature that didn't matter 
before.

It happens because the warning is done as part of constant expression 
evaluation, but then the expression isn't actually *used*, so when we 
optimize it away - at a later date - it's too late to undo the warning.

I don't rightly know how to fix it. We do want to do the constant 
evaluation early (since all the optimizations that may then make it a 
non-issue depends on constants being constants!), but in order to not 
output the warning we'd have to turn the constant into a "constant with 
warning 'xyz' if used".

Which we have no support for in sparse yet.

		Linus

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:38     ` Linus Torvalds
@ 2008-04-27 21:02       ` Al Viro
  0 siblings, 0 replies; 27+ messages in thread
From: Al Viro @ 2008-04-27 21:02 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML

On Sun, Apr 27, 2008 at 01:38:34PM -0700, Linus Torvalds wrote:

> It happens because the warning is done as part of constant expression 
> evaluation, but then the expression isn't actually *used*, so when we 
> optimize it away - at a later date - it's too late to undo the warning.
> 
> I don't rightly know how to fix it. We do want to do the constant 
> evaluation early (since all the optimizations that may then make it a 
> non-issue depends on constants being constants!), but in order to not 
> output the warning we'd have to turn the constant into a "constant with 
> warning 'xyz' if used".
> 
> Which we have no support for in sparse yet.

It's not even a constant, really...  What we need is
	* don't fold further if that sucker would be evaluated (i.e. 0 && <...>
is folded, 0 & <...> is not)
	* don't consider an integer constant expression with value 0 for
purposes of null pointer constant handling
	* emit corresponding insn in linearize_expression() when we run into
such sucker.
	* somewhere around the call of vrfy_flow() walk through insns in
remaining bb (we'd done dead code elimination already) and spew postponed
warning on each such insn.  Probably.  Assuming that I'm not entirely
confused about what's going on in that area - which is quite possible.

Folks, could somebody familiar with the backend comment on the last part?

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:26 ` Linus Torvalds
  2008-04-27 20:29   ` Harvey Harrison
@ 2008-04-28 14:04   ` Thomas Gleixner
  2008-04-28 15:10     ` Alexander van Heukelum
  2008-04-28 16:25     ` Linus Torvalds
  2008-04-28 14:58   ` Alexander van Heukelum
  2 siblings, 2 replies; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-28 14:04 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller

On Sun, 27 Apr 2008, Linus Torvalds wrote:
> On Sun, 27 Apr 2008, Harvey Harrison wrote:
> >
> > No need for a sentinal if we explicitly catch the no bits/all bits set
> > cases, make it clear they are special cases returning size/BITS_PER_LONG.
> 
> These things need to be re-done anyway. David already complained that the 
> thing inlines to something much much too big.

The delta between the inlined version including the constant
optimizations and the original non inlined version is exactly 1400
bytes on a sparc64 defconfig build.

> Let's just make it not be an inline at all.

Which makes the whole constant optimization moot.

How about making the optimization controlled by a config switch so
arch maintainers can decide whether they want to enable the constant
optimization or unconditionally call the lib function ?

See patch below. It gives back the 1400 bytes on SPARC64 and other
platforms that have no instruction for find bit.

Thanks,
	tglx

---------->

Subject: bitops: optional bitops mapsize optimization on config switch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 28 Apr 2008 00:22:06 +0200

The mapsize optimizations which were moved from x86 to the generic
code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the
binary size on non x86 architectures.

Make the optimization depend on a config switch so architecture
maintainers can decide.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>

---
 arch/x86/Kconfig.cpu   |    1 +
 include/linux/bitops.h |    6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/x86/Kconfig.cpu
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig.cpu
+++ linux-2.6/arch/x86/Kconfig.cpu
@@ -282,6 +282,7 @@ config X86_CPU
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_FIND_NEXT_BIT
+	select GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
 
 config X86_GENERIC
 	bool "Generic x86 support"
Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -190,6 +190,7 @@ static __always_inline unsigned long
 find_next_bit(const unsigned long *addr, unsigned long size,
 		unsigned long offset)
 {
+#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
 	unsigned long value;
 
 	/* Avoid a function call if the bitmap size is a constant */
@@ -209,7 +210,7 @@ find_next_bit(const unsigned long *addr,
 		value = (*addr) & ((~0ul) << offset);
 		return (value == 0) ? BITS_PER_LONG : __ffs(value);
 	}
-
+#endif
 	/* size is not constant or too big */
 	return __find_next_bit(addr, size, offset);
 }
@@ -227,6 +228,7 @@ static __always_inline unsigned long
 find_next_zero_bit(const unsigned long *addr, unsigned long size,
 		unsigned long offset)
 {
+#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
 	unsigned long value;
 
 	/* Avoid a function call if the bitmap size is a constant */
@@ -246,7 +248,7 @@ find_next_zero_bit(const unsigned long *
 		value = (~(*addr)) & ((~0ul) << offset);
 		return (value == 0) ? BITS_PER_LONG : __ffs(value);
 	}
-
+#endif
 	/* size is not constant or too big */
 	return __find_next_zero_bit(addr, size, offset);
 }

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison
  2008-04-27 20:26 ` Linus Torvalds
@ 2008-04-28 14:32 ` Alexander van Heukelum
  1 sibling, 0 replies; 27+ messages in thread
From: Alexander van Heukelum @ 2008-04-28 14:32 UTC (permalink / raw)
  To: Harvey Harrison, Ingo Molnar, Linus Torvalds; +Cc: Andrew Morton, LKML

Hi,

Ingo just pointed me to this thread.

On Sun, 27 Apr 2008 13:19:51 -0700, "Harvey Harrison"
<harvey.harrison@gmail.com> said:
> No need for a sentinal if we explicitly catch the no bits/all bits set
> cases, make it clear they are special cases returning size/BITS_PER_LONG.
> 
> Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
> ---
>  include/linux/bitops.h |   93
>  ++++++++++++++++++++----------------------------
>  1 files changed, 39 insertions(+), 54 deletions(-)
> 
> diff --git a/include/linux/bitops.h b/include/linux/bitops.h
> index 48bde60..d9eb58a 100644
> --- a/include/linux/bitops.h
> +++ b/include/linux/bitops.h
> @@ -127,18 +127,17 @@ extern unsigned long __find_first_bit(const
> unsigned long *addr,
>  static __always_inline unsigned long
>  find_first_bit(const unsigned long *addr, unsigned long size)
>  {
> -	/* Avoid a function call if the bitmap size is a constant */
> -	/* and not bigger than BITS_PER_LONG. */
> -
> -	/* insert a sentinel so that __ffs returns size if there */
> -	/* are no set bits in the bitmap */
> -	if (__builtin_constant_p(size) && (size < BITS_PER_LONG))
> -		return __ffs((*addr) | (1ul << size));
> -
> -	/* the result of __ffs(0) is undefined, so it needs to be */
> -	/* handled separately */
> -	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
> -		return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr);
> +	/*
> +	 * Avoid a function call if the bitmap size is a constant and not
> +	 * bigger than BITS_PER_LONG.  Ensure we return size if there are
> +	 * no set bits.
> +	 */
> +	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
> +		if (*addr == 0)
> +			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
> +		else
> +			return __ffs(*addr);
> +	}

Ehm... assume size=16 and *addr=0x80000000
    then __ffs(*addr) = 31. Not 16.

>  	/* size is not constant or too big */
>  	return __find_first_bit(addr, size);
> @@ -157,20 +156,17 @@ extern unsigned long __find_first_zero_bit(const
> unsigned long *addr,
>  static __always_inline unsigned long
>  find_first_zero_bit(const unsigned long *addr, unsigned long size)
>  {
> -	/* Avoid a function call if the bitmap size is a constant */
> -	/* and not bigger than BITS_PER_LONG. */
> -
> -	/* insert a sentinel so that __ffs returns size if there */
> -	/* are no set bits in the bitmap */
> -	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
> -		return __ffs(~(*addr) | (1ul << size));
> +	/*
> +	 * Avoid a function call if the bitmap size is a constant and not
> +	 * bigger than BITS_PER_LONG.  Ensure we return size if all bits set.
> +	 */
> +	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
> +		if ((~(*addr)) == 0)
> +			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
> +		else
> +			return __ffs(~(*addr));
>  	}
>  
> -	/* the result of __ffs(0) is undefined, so it needs to be */
> -	/* handled separately */
> -	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
> -		return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr));
> -
>  	/* size is not constant or too big */
>  	return __find_first_zero_bit(addr, size);
>  }
> @@ -192,22 +188,17 @@ find_next_bit(const unsigned long *addr, unsigned
> long size,
>  {
>  	unsigned long value;
>  
> -	/* Avoid a function call if the bitmap size is a constant */
> -	/* and not bigger than BITS_PER_LONG. */
> -
> -	/* insert a sentinel so that __ffs returns size if there */
> -	/* are no set bits in the bitmap */
> -	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
> +	/*
> +	 * Avoid a function call if the bitmap size is a constant and not
> +	 * bigger than BITS_PER_LONG.  Ensure we return size if there are
> +	 * no set bits.
> +	 */
> +	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
>  		value = (*addr) & ((~0ul) << offset);
> -		value |= (1ul << size);
> -		return __ffs(value);
> -	}
> -
> -	/* the result of __ffs(0) is undefined, so it needs to be */
> -	/* handled separately */
> -	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
> -		value = (*addr) & ((~0ul) << offset);
> -		return (value == 0) ? BITS_PER_LONG : __ffs(value);
> +		if (value == 0)
> +			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
> +		else
> +			return __ffs(value);
>  	}
>  
>  	/* size is not constant or too big */
> @@ -229,22 +220,16 @@ find_next_zero_bit(const unsigned long *addr,
> unsigned long size,
>  {
>  	unsigned long value;
>  
> -	/* Avoid a function call if the bitmap size is a constant */
> -	/* and not bigger than BITS_PER_LONG. */
> -
> -	/* insert a sentinel so that __ffs returns size if there */
> -	/* are no set bits in the bitmap */
> -	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
> -		value = (~(*addr)) & ((~0ul) << offset);
> -		value |= (1ul << size);
> -		return __ffs(value);
> -	}
> -
> -	/* the result of __ffs(0) is undefined, so it needs to be */
> -	/* handled separately */
> -	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
> +	/*
> +	 * Avoid a function call if the bitmap size is a constant and not
> +	 * bigger than BITS_PER_LONG.  Ensure we return size if all bits set.
> +	 */
> +	if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) {
>  		value = (~(*addr)) & ((~0ul) << offset);
> -		return (value == 0) ? BITS_PER_LONG : __ffs(value);
> +		if (value == 0)
> +			return (size < BITS_PER_LONG) ? size : BITS_PER_LONG;
> +		else
> +			return __ffs(value);
>  	}
>  
>  	/* size is not constant or too big */
> -- 
> 1.5.5.1.270.g89765
-- 
  Alexander van Heukelum
  heukelum@fastmail.fm

-- 
http://www.fastmail.fm - mmm... Fastmail...


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-27 20:26 ` Linus Torvalds
  2008-04-27 20:29   ` Harvey Harrison
  2008-04-28 14:04   ` Thomas Gleixner
@ 2008-04-28 14:58   ` Alexander van Heukelum
  2 siblings, 0 replies; 27+ messages in thread
From: Alexander van Heukelum @ 2008-04-28 14:58 UTC (permalink / raw)
  To: Linus Torvalds, Harvey Harrison, David S. Miller
  Cc: Ingo Molnar, Andrew Morton, LKML

On Sun, 27 Apr 2008 13:26:34 -0700 (PDT), "Linus Torvalds"
<torvalds@linux-foundation.org> said:
> On Sun, 27 Apr 2008, Harvey Harrison wrote:
> >
> > No need for a sentinal if we explicitly catch the no bits/all bits set
> > cases, make it clear they are special cases returning size/BITS_PER_LONG.
> 
> These things need to be re-done anyway. David already complained that the 
> thing inlines to something much much too big.
> 
> Let's just make it not be an inline at all.

Hello,

I think that's the wrong way to go about it. The problem that David
Miller pointed out was that the small-bitmap optimization caused an
inliningdisaster. Apparently, the generic version of __ffs is very
big on sparc64. In my opinion the implementation of __ffs should be
made out of line. If you move the wrapper of find_first_bit etc
out of line, all possibilities of optimization will be destroyed.
Better to remove them completely, then.

Greetings,
    Alexander

> 		Linus
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 
-- 
  Alexander van Heukelum
  heukelum@fastmail.fm

-- 
http://www.fastmail.fm - Accessible with your email software
                          or over the web


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 14:04   ` Thomas Gleixner
@ 2008-04-28 15:10     ` Alexander van Heukelum
  2008-04-28 15:58       ` Thomas Gleixner
  2008-04-28 16:25     ` Linus Torvalds
  1 sibling, 1 reply; 27+ messages in thread
From: Alexander van Heukelum @ 2008-04-28 15:10 UTC (permalink / raw)
  To: Thomas Gleixner, Linus Torvalds
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller

> See patch below. It gives back the 1400 bytes on SPARC64 and other
> platforms that have no instruction for find bit.

Hi,

I, personally, see this patch as a stopgap measure. The real problem
is that the generic __ffs is inlined and ends up generating a lot
of instructions.

Until the generic implementation of __ffs is fixed not to be an
enormous inline function, this seems to be a reasonable thing
to do, though it's a shame that the optimization is now default-
off for architectures with a good implementation of __ffs.

Greetings,
    Alexander

> Thanks,
> 	tglx
> 
> ---------->
> 
> Subject: bitops: optional bitops mapsize optimization on config switch
> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Mon, 28 Apr 2008 00:22:06 +0200
> 
> The mapsize optimizations which were moved from x86 to the generic
> code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the
> binary size on non x86 architectures.
> 
> Make the optimization depend on a config switch so architecture
> maintainers can decide.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Acked-by: Ingo Molnar <mingo@elte.hu>
> 
> ---
>  arch/x86/Kconfig.cpu   |    1 +
>  include/linux/bitops.h |    6 ++++--
>  2 files changed, 5 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/arch/x86/Kconfig.cpu
> ===================================================================
> --- linux-2.6.orig/arch/x86/Kconfig.cpu
> +++ linux-2.6/arch/x86/Kconfig.cpu
> @@ -282,6 +282,7 @@ config X86_CPU
>  	def_bool y
>  	select GENERIC_FIND_FIRST_BIT
>  	select GENERIC_FIND_NEXT_BIT
> +	select GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
>  
>  config X86_GENERIC
>  	bool "Generic x86 support"
> Index: linux-2.6/include/linux/bitops.h
> ===================================================================
> --- linux-2.6.orig/include/linux/bitops.h
> +++ linux-2.6/include/linux/bitops.h
> @@ -190,6 +190,7 @@ static __always_inline unsigned long
>  find_next_bit(const unsigned long *addr, unsigned long size,
>  		unsigned long offset)
>  {
> +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
>  	unsigned long value;
>  
>  	/* Avoid a function call if the bitmap size is a constant */
> @@ -209,7 +210,7 @@ find_next_bit(const unsigned long *addr,
>  		value = (*addr) & ((~0ul) << offset);
>  		return (value == 0) ? BITS_PER_LONG : __ffs(value);
>  	}
> -
> +#endif
>  	/* size is not constant or too big */
>  	return __find_next_bit(addr, size, offset);
>  }
> @@ -227,6 +228,7 @@ static __always_inline unsigned long
>  find_next_zero_bit(const unsigned long *addr, unsigned long size,
>  		unsigned long offset)
>  {
> +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE
>  	unsigned long value;
>  
>  	/* Avoid a function call if the bitmap size is a constant */
> @@ -246,7 +248,7 @@ find_next_zero_bit(const unsigned long *
>  		value = (~(*addr)) & ((~0ul) << offset);
>  		return (value == 0) ? BITS_PER_LONG : __ffs(value);
>  	}
> -
> +#endif
>  	/* size is not constant or too big */
>  	return __find_next_zero_bit(addr, size, offset);
>  }
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
> 
-- 
  Alexander van Heukelum
  heukelum@fastmail.fm

-- 
http://www.fastmail.fm - mmm... Fastmail...


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 15:10     ` Alexander van Heukelum
@ 2008-04-28 15:58       ` Thomas Gleixner
  0 siblings, 0 replies; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-28 15:58 UTC (permalink / raw)
  To: Alexander van Heukelum
  Cc: Linus Torvalds, Harvey Harrison, Ingo Molnar, Andrew Morton, LKML,
	David Miller

On Mon, 28 Apr 2008, Alexander van Heukelum wrote:
> > See patch below. It gives back the 1400 bytes on SPARC64 and other
> > platforms that have no instruction for find bit.
> 
> Hi,
> 
> I, personally, see this patch as a stopgap measure. The real problem
> is that the generic __ffs is inlined and ends up generating a lot
> of instructions.
> 
> Until the generic implementation of __ffs is fixed not to be an
> enormous inline function, this seems to be a reasonable thing
> to do, though it's a shame that the optimization is now default-
> off for architectures with a good implementation of __ffs.

Yup, it hasn't been there before and we explicitely enable it on
x86. If there are other archs which have a good one it's a nobrainer
to enable it.

Thanks,
	tglx


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 14:04   ` Thomas Gleixner
  2008-04-28 15:10     ` Alexander van Heukelum
@ 2008-04-28 16:25     ` Linus Torvalds
  2008-04-28 16:47       ` Thomas Gleixner
  1 sibling, 1 reply; 27+ messages in thread
From: Linus Torvalds @ 2008-04-28 16:25 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller



On Mon, 28 Apr 2008, Thomas Gleixner wrote:
>
> How about making the optimization controlled by a config switch so
> arch maintainers can decide whether they want to enable the constant
> optimization or unconditionally call the lib function ?

No.

This is just making that damn header line look worse and worse.

Is there a _reason_ to optimize this stupid function this way?

		Linus

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 16:25     ` Linus Torvalds
@ 2008-04-28 16:47       ` Thomas Gleixner
  2008-04-28 16:54         ` Linus Torvalds
  0 siblings, 1 reply; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-28 16:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller

On Mon, 28 Apr 2008, Linus Torvalds wrote:
> >
> > How about making the optimization controlled by a config switch so
> > arch maintainers can decide whether they want to enable the constant
> > optimization or unconditionally call the lib function ?
> 
> No.
> 
> This is just making that damn header line look worse and worse.
> 
> Is there a _reason_ to optimize this stupid function this way?

On architectures which have a find bit instruction we can replace the
call to the library function by a single instruction when the size of
the bitmap is less/equal bits per long and when the bitnr is a
constant.

Thanks,
	tglx



^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 16:47       ` Thomas Gleixner
@ 2008-04-28 16:54         ` Linus Torvalds
  2008-04-28 19:26           ` Thomas Gleixner
  2008-04-28 19:57           ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen
  0 siblings, 2 replies; 27+ messages in thread
From: Linus Torvalds @ 2008-04-28 16:54 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller



On Mon, 28 Apr 2008, Thomas Gleixner wrote:

> On Mon, 28 Apr 2008, Linus Torvalds wrote:
> > >
> > > How about making the optimization controlled by a config switch so
> > > arch maintainers can decide whether they want to enable the constant
> > > optimization or unconditionally call the lib function ?
> > 
> > No.
> > 
> > This is just making that damn header line look worse and worse.
> > 
> > Is there a _reason_ to optimize this stupid function this way?
> 
> On architectures which have a find bit instruction we can replace the
> call to the library function by a single instruction when the size of
> the bitmap is less/equal bits per long and when the bitnr is a
> constant.

That's not what I asked.

I asked whether there is a *reason* to optimize this and cause all these 
stupid problems.

Is there a real hot path anywhere that actually uses this and depends on 
it?

		Linus

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 16:54         ` Linus Torvalds
@ 2008-04-28 19:26           ` Thomas Gleixner
  2008-04-28 19:37             ` Linus Torvalds
  2008-04-28 19:57           ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen
  1 sibling, 1 reply; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-28 19:26 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller

On Mon, 28 Apr 2008, Linus Torvalds wrote:
> > > > How about making the optimization controlled by a config switch so
> > > > arch maintainers can decide whether they want to enable the constant
> > > > optimization or unconditionally call the lib function ?
> > > 
> > > No.
> > > 
> > > This is just making that damn header line look worse and worse.
> > > 
> > > Is there a _reason_ to optimize this stupid function this way?
> > 
> > On architectures which have a find bit instruction we can replace the
> > call to the library function by a single instruction when the size of
> > the bitmap is less/equal bits per long and when the bitnr is a
> > constant.
> 
> That's not what I asked.
> 
> I asked whether there is a *reason* to optimize this and cause all these 
> stupid problems.
> 
> Is there a real hot path anywhere that actually uses this and depends on 
> it?

Sorry, misunderstood your question. 

I checked the use cases and have not found a single one for
find_next_(zero_)bit() which makes use of this micro optimization. In
fact the .text section of vmlinux of the "optimized" and the straight
function call are identical. So the effect of those micro
optimizations is exactly zero.

find_first_(zero_)bit() has a couple of places where the optimization
hits, but the code size reduction is mere 21 bytes and the use cases
are not in real hot pathes AFAICT.

I doubt that that is worth the trouble and we should just remove those
inlines alltogether. This was discussed before, but Andi objected to
remove those micro optimizations and nobody had time to actually
verify the real benefits.

I'll whip up a patch.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 19:26           ` Thomas Gleixner
@ 2008-04-28 19:37             ` Linus Torvalds
  2008-04-28 21:55               ` Ingo Molnar
  2008-04-29 10:01               ` [PATCH] bitops: remove "optimizations" Thomas Gleixner
  0 siblings, 2 replies; 27+ messages in thread
From: Linus Torvalds @ 2008-04-28 19:37 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller



On Mon, 28 Apr 2008, Thomas Gleixner wrote:
> 
> I doubt that that is worth the trouble and we should just remove those
> inlines alltogether. This was discussed before, but Andi objected to
> remove those micro optimizations and nobody had time to actually
> verify the real benefits.

Thanks for checking.

In general, I think we should act the other way: we should *assume* that 
inlines and complex macros in header files are not worth it and we'd be 
better off with just plain function calls, unless it's really obvious that 
the inline function is always worth it.

We have tons of good examples of inline functions in headers that really 
*really* want to be that way: <linux/list.h> is generally a collection of 
things that clearly expand to somthing that is (a) easily critical and (b) 
inlining things generally really causes a smaller code footprint than 
having a "call" instruction and all the argument setup - regardless of how 
many times it is done.

So we do have cases where the inlines are obviously worth it. But in 
general, I think we should try to move things from the header files into 
*.c files unless there is a really clear reason for keeping it that way.

(Some inline functions do depend on things like constant arguments goign 
away, with kmalloc() being a good example of some really nasty header file 
tricks that are likely worth it despite their extreme ugliness)

		Linus

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 16:54         ` Linus Torvalds
  2008-04-28 19:26           ` Thomas Gleixner
@ 2008-04-28 19:57           ` Andi Kleen
  1 sibling, 0 replies; 27+ messages in thread
From: Andi Kleen @ 2008-04-28 19:57 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Harvey Harrison, Ingo Molnar, Andrew Morton,
	LKML, David Miller

Linus Torvalds <torvalds@linux-foundation.org> writes:
>
> Is there a real hot path anywhere that actually uses this and depends on 
> it?

A long time ago during profiling I found that in the scheduler back then
for_each_cpu() with find_next_bit() was somewhat hot. But I'm not sure it 
still would be in the new scheduler anyways and the test case a little
dumb anyways (overscheduling user space)

Still I think for_each_cpu() should be reasonably stream lined code.

-Andi

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: simplify generic bit finding functions
  2008-04-28 19:37             ` Linus Torvalds
@ 2008-04-28 21:55               ` Ingo Molnar
  2008-04-29 10:01               ` [PATCH] bitops: remove "optimizations" Thomas Gleixner
  1 sibling, 0 replies; 27+ messages in thread
From: Ingo Molnar @ 2008-04-28 21:55 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Harvey Harrison, Andrew Morton, LKML,
	David Miller


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> So we do have cases where the inlines are obviously worth it. But in 
> general, I think we should try to move things from the header files 
> into *.c files unless there is a really clear reason for keeping it 
> that way.

there's another benefit, and in asm-x86 we prefer to move inlines to .c 
files even in borderline cases because it simplifies the type 
dependencies: not having to fully define all types at the function 
prototype site avoids include file dependency hell.

Putting things like a task struct dereference into a lowlevel inline 
file easily causes dependency problems that causes people to use macros 
instead - which have their own set of readability and side-effect 
problems.

a third argument is that inlines seldom get smaller. So if they are 
borderline and we move them into a .c, and later on the function gets 
larger, no harm is done. But if we keep the inline in a .h in the 
borderline case and we grow the inline later on, the whole kernel bloats 
in a multiplied way, without any apparent direct feedback to the 
developer that something wrong and harmful just happened.

	Ingo

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] bitops: remove "optimizations"
  2008-04-28 19:37             ` Linus Torvalds
  2008-04-28 21:55               ` Ingo Molnar
@ 2008-04-29 10:01               ` Thomas Gleixner
  2008-04-29 10:03                 ` David Miller
  1 sibling, 1 reply; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-29 10:01 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller

The mapsize optimizations which were moved from x86 to the generic
code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the
binary size on non x86 architectures.

Looking into the real effects of the "optimizations" it turned out
that they are not used in find_next_bit() and find_next_zero_bit().

The ones in find_first_bit() and find_first_zero_bit() are used in a
couple of places but none of them is a real hot path.

Remove the "optimizations" all together and call the library functions
unconditionally.

Boot-tested on x86 and compile tested on every cross compiler I have.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 include/linux/bitops.h |  115 +++++--------------------------------------------
 lib/find_next_bit.c    |   22 ++++-----
 2 files changed, 22 insertions(+), 115 deletions(-)

Index: linux-2.6/include/linux/bitops.h
===================================================================
--- linux-2.6.orig/include/linux/bitops.h
+++ linux-2.6/include/linux/bitops.h
@@ -114,8 +114,6 @@ static inline unsigned fls_long(unsigned
 
 #ifdef __KERNEL__
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
-extern unsigned long __find_first_bit(const unsigned long *addr,
-		unsigned long size);
 
 /**
  * find_first_bit - find the first set bit in a memory region
@@ -124,28 +122,8 @@ extern unsigned long __find_first_bit(co
  *
  * Returns the bit number of the first set bit.
  */
-static __always_inline unsigned long
-find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG))
-		return __ffs((*addr) | (1ul << size));
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
-		return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr);
-
-	/* size is not constant or too big */
-	return __find_first_bit(addr, size);
-}
-
-extern unsigned long __find_first_zero_bit(const unsigned long *addr,
-		unsigned long size);
+extern unsigned long find_first_bit(const unsigned long *addr,
+				    unsigned long size);
 
 /**
  * find_first_zero_bit - find the first cleared bit in a memory region
@@ -154,31 +132,12 @@ extern unsigned long __find_first_zero_b
  *
  * Returns the bit number of the first cleared bit.
  */
-static __always_inline unsigned long
-find_first_zero_bit(const unsigned long *addr, unsigned long size)
-{
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
-		return __ffs(~(*addr) | (1ul << size));
-	}
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG))
-		return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr));
-
-	/* size is not constant or too big */
-	return __find_first_zero_bit(addr, size);
-}
+extern unsigned long find_first_zero_bit(const unsigned long *addr,
+					 unsigned long size);
+
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef CONFIG_GENERIC_FIND_NEXT_BIT
-extern unsigned long __find_next_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
 
 /**
  * find_next_bit - find the next set bit in a memory region
@@ -186,36 +145,8 @@ extern unsigned long __find_next_bit(con
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
  */
-static __always_inline unsigned long
-find_next_bit(const unsigned long *addr, unsigned long size,
-		unsigned long offset)
-{
-	unsigned long value;
-
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
-		value = (*addr) & ((~0ul) << offset);
-		value |= (1ul << size);
-		return __ffs(value);
-	}
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
-		value = (*addr) & ((~0ul) << offset);
-		return (value == 0) ? BITS_PER_LONG : __ffs(value);
-	}
-
-	/* size is not constant or too big */
-	return __find_next_bit(addr, size, offset);
-}
-
-extern unsigned long __find_next_zero_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
+extern unsigned long find_next_bit(const unsigned long *addr,
+				   unsigned long size, unsigned long offset);
 
 /**
  * find_next_zero_bit - find the next cleared bit in a memory region
@@ -223,33 +154,11 @@ extern unsigned long __find_next_zero_bi
  * @offset: The bitnumber to start searching at
  * @size: The bitmap size in bits
  */
-static __always_inline unsigned long
-find_next_zero_bit(const unsigned long *addr, unsigned long size,
-		unsigned long offset)
-{
-	unsigned long value;
-
-	/* Avoid a function call if the bitmap size is a constant */
-	/* and not bigger than BITS_PER_LONG. */
-
-	/* insert a sentinel so that __ffs returns size if there */
-	/* are no set bits in the bitmap */
-	if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) {
-		value = (~(*addr)) & ((~0ul) << offset);
-		value |= (1ul << size);
-		return __ffs(value);
-	}
-
-	/* the result of __ffs(0) is undefined, so it needs to be */
-	/* handled separately */
-	if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) {
-		value = (~(*addr)) & ((~0ul) << offset);
-		return (value == 0) ? BITS_PER_LONG : __ffs(value);
-	}
-
-	/* size is not constant or too big */
-	return __find_next_zero_bit(addr, size, offset);
-}
+
+extern unsigned long find_next_zero_bit(const unsigned long *addr,
+					unsigned long size,
+					unsigned long offset);
+
 #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
 #endif /* __KERNEL__ */
 #endif
Index: linux-2.6/lib/find_next_bit.c
===================================================================
--- linux-2.6.orig/lib/find_next_bit.c
+++ linux-2.6/lib/find_next_bit.c
@@ -20,8 +20,8 @@
 /*
  * Find the next set bit in a memory region.
  */
-unsigned long __find_next_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset)
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
 {
 	const unsigned long *p = addr + BITOP_WORD(offset);
 	unsigned long result = offset & ~(BITS_PER_LONG-1);
@@ -58,14 +58,14 @@ found_first:
 found_middle:
 	return result + __ffs(tmp);
 }
-EXPORT_SYMBOL(__find_next_bit);
+EXPORT_SYMBOL(find_next_bit);
 
 /*
  * This implementation of find_{first,next}_zero_bit was stolen from
  * Linus' asm-alpha/bitops.h.
  */
-unsigned long __find_next_zero_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset)
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
 {
 	const unsigned long *p = addr + BITOP_WORD(offset);
 	unsigned long result = offset & ~(BITS_PER_LONG-1);
@@ -102,15 +102,14 @@ found_first:
 found_middle:
 	return result + ffz(tmp);
 }
-EXPORT_SYMBOL(__find_next_zero_bit);
+EXPORT_SYMBOL(find_next_zero_bit);
 #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
 
 #ifdef CONFIG_GENERIC_FIND_FIRST_BIT
 /*
  * Find the first set bit in a memory region.
  */
-unsigned long __find_first_bit(const unsigned long *addr,
-		unsigned long size)
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 {
 	const unsigned long *p = addr;
 	unsigned long result = 0;
@@ -131,13 +130,12 @@ unsigned long __find_first_bit(const uns
 found:
 	return result + __ffs(tmp);
 }
-EXPORT_SYMBOL(__find_first_bit);
+EXPORT_SYMBOL(find_first_bit);
 
 /*
  * Find the first cleared bit in a memory region.
  */
-unsigned long __find_first_zero_bit(const unsigned long *addr,
-		unsigned long size)
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
 {
 	const unsigned long *p = addr;
 	unsigned long result = 0;
@@ -158,7 +156,7 @@ unsigned long __find_first_zero_bit(cons
 found:
 	return result + ffz(tmp);
 }
-EXPORT_SYMBOL(__find_first_zero_bit);
+EXPORT_SYMBOL(find_first_zero_bit);
 #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
 
 #ifdef __BIG_ENDIAN

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 10:01               ` [PATCH] bitops: remove "optimizations" Thomas Gleixner
@ 2008-04-29 10:03                 ` David Miller
  2008-04-29 12:34                   ` David Miller
  0 siblings, 1 reply; 27+ messages in thread
From: David Miller @ 2008-04-29 10:03 UTC (permalink / raw)
  To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel

From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)

> The mapsize optimizations which were moved from x86 to the generic
> code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the
> binary size on non x86 architectures.
> 
> Looking into the real effects of the "optimizations" it turned out
> that they are not used in find_next_bit() and find_next_zero_bit().
> 
> The ones in find_first_bit() and find_first_zero_bit() are used in a
> couple of places but none of them is a real hot path.
> 
> Remove the "optimizations" all together and call the library functions
> unconditionally.
> 
> Boot-tested on x86 and compile tested on every cross compiler I have.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 10:03                 ` David Miller
@ 2008-04-29 12:34                   ` David Miller
  2008-04-29 14:20                     ` Ingo Molnar
  2008-04-29 16:51                     ` Thomas Gleixner
  0 siblings, 2 replies; 27+ messages in thread
From: David Miller @ 2008-04-29 12:34 UTC (permalink / raw)
  To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel

From: David Miller <davem@davemloft.net>
Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)

> From: Thomas Gleixner <tglx@linutronix.de>
> Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
> 
> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> 
> Acked-by: David S. Miller <davem@davemloft.net>

Ironically, I just bisected sparc64 bootup failures to the following
changeset.  It's very late here, and I haven't looked into the
details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
system with CONFIG_NUMA disabled.

Isn't it funny that this optimization not only was useless, but also
broke things. :-/

64970b68d2b3ed32b964b0b30b1b98518fde388e is first bad commit
commit 64970b68d2b3ed32b964b0b30b1b98518fde388e
Author: Alexander van Heukelum <heukelum@mailshack.com>
Date:   Tue Mar 11 16:17:19 2008 +0100

    x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps
    
    This moves an optimization for searching constant-sized small
    bitmaps form x86_64-specific to generic code.
    
    On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
    changes with this applied. I have observed only four places where this
    optimization avoids a call into find_next_bit:
    
    In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
    and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
    In __next_cpu a call is avoided for a 32-bit bitmap. That's it.
    
    On x86_64, 52 locations are optimized with a minimal increase in
    code size:
    
    Current #testing defconfig:
        146 x bsf, 27 x find_next_*bit
       text    data     bss     dec     hex filename
       5392637  846592  724424 6963653  6a41c5 vmlinux
    
    After removing the x86_64 specific optimization for find_next_*bit:
        94 x bsf, 79 x find_next_*bit
       text    data     bss     dec     hex filename
       5392358  846592  724424 6963374  6a40ae vmlinux
    
    After this patch (making the optimization generic):
        146 x bsf, 27 x find_next_*bit
       text    data     bss     dec     hex filename
       5392396  846592  724424 6963412  6a40d4 vmlinux
    
    [ tglx@linutronix.de: build fixes ]
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

:040000 040000 c0407f7df7dc5333c78c300780931a269ae0dedd 2f6ef634a35b6dd46e40ea70128c39a742e60501 M      include
:040000 040000 556ee6ccb15cdbb1aa5a690c732a5492d58dfe6f 937d05977f46056c12b5da64ca62959c06a912c0 M      lib

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 12:34                   ` David Miller
@ 2008-04-29 14:20                     ` Ingo Molnar
  2008-04-29 22:31                       ` David Miller
  2008-04-29 16:51                     ` Thomas Gleixner
  1 sibling, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2008-04-29 14:20 UTC (permalink / raw)
  To: David Miller; +Cc: tglx, torvalds, harvey.harrison, akpm, linux-kernel


* David Miller <davem@davemloft.net> wrote:

> From: David Miller <davem@davemloft.net>
> Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)
> 
> > From: Thomas Gleixner <tglx@linutronix.de>
> > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
> > 
> > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > 
> > Acked-by: David S. Miller <davem@davemloft.net>
> 
> Ironically, I just bisected sparc64 bootup failures to the following 
> changeset.  It's very late here, and I haven't looked into the 
> details, but it seems to wedge in free_area_init_nodes() on a non-NUMA 
> system with CONFIG_NUMA disabled.

hm. I guess the next thing you plan to check is whether Thomas's patch 
fixes it?

> Isn't it funny that this optimization not only was useless, but also 
> broke things. :-/

yeah and sorry :-/ It would still be nice to understand why this 
happened, it looks like a weird (and unexpected) interaction.

	Ingo

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 12:34                   ` David Miller
  2008-04-29 14:20                     ` Ingo Molnar
@ 2008-04-29 16:51                     ` Thomas Gleixner
  2008-04-29 22:58                       ` David Miller
  1 sibling, 1 reply; 27+ messages in thread
From: Thomas Gleixner @ 2008-04-29 16:51 UTC (permalink / raw)
  To: David Miller; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel

On Tue, 29 Apr 2008, David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT)
> 
> > From: Thomas Gleixner <tglx@linutronix.de>
> > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST)
> > 
> > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> > 
> > Acked-by: David S. Miller <davem@davemloft.net>
> 
> Ironically, I just bisected sparc64 bootup failures to the following
> changeset.  It's very late here, and I haven't looked into the
> details, but it seems to wedge in free_area_init_nodes() on a non-NUMA
> system with CONFIG_NUMA disabled.
> 
> Isn't it funny that this optimization not only was useless, but also
> broke things. :-/

The question is, whether it broke things or just unearthed some bug
hidden elsewhere.

Thanks,
	tglx

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 14:20                     ` Ingo Molnar
@ 2008-04-29 22:31                       ` David Miller
  0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2008-04-29 22:31 UTC (permalink / raw)
  To: mingo; +Cc: tglx, torvalds, harvey.harrison, akpm, linux-kernel

From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 29 Apr 2008 16:20:39 +0200

> 
> * David Miller <davem@davemloft.net> wrote:
> 
> > Ironically, I just bisected sparc64 bootup failures to the following 
> > changeset.  It's very late here, and I haven't looked into the 
> > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA 
> > system with CONFIG_NUMA disabled.
> 
> hm. I guess the next thing you plan to check is whether Thomas's patch 
> fixes it?

Yes, I just needed some sleep first, which I just obtained :-)

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 16:51                     ` Thomas Gleixner
@ 2008-04-29 22:58                       ` David Miller
  2008-04-29 23:30                         ` David Miller
  0 siblings, 1 reply; 27+ messages in thread
From: David Miller @ 2008-04-29 22:58 UTC (permalink / raw)
  To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel

From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 Apr 2008 18:51:51 +0200 (CEST)

> The question is, whether it broke things or just unearthed some bug
> hidden elsewhere.

So I did a quick test, just #if 0'ing out the optimization inline
portions of the find_first_bit() code in linux/bitops.h, and forcing
it to always unconditionally call __find_first_bit() fixes the
regression.

Given that others who tested could not find one case where the
optimization cases actually applied, and it's breaking things for me,
my theory is that it's triggering for some obscure case on sparc64 and
thus showing a bug in these optimizations since in practice I'm the
only person to actually test this new code.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] bitops: remove "optimizations"
  2008-04-29 22:58                       ` David Miller
@ 2008-04-29 23:30                         ` David Miller
  0 siblings, 0 replies; 27+ messages in thread
From: David Miller @ 2008-04-29 23:30 UTC (permalink / raw)
  To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel

From: David Miller <davem@davemloft.net>
Date: Tue, 29 Apr 2008 15:58:24 -0700 (PDT)

> Given that others who tested could not find one case where the
> optimization cases actually applied, and it's breaking things for me,
> my theory is that it's triggering for some obscure case on sparc64 and
> thus showing a bug in these optimizations since in practice I'm the
> only person to actually test this new code.

Ok, I think I see the problem.

The core issue is that (X << N) is undefined when N is >= the
word size, but that's exactly what the find_next_bit() inline
optimizations do.

This optimization code will trigger on 64-bit if NR_CPUS is set
to 64, and you actually have 64 cpus.  It should also occur
on 32-bit if NR_CPUS=32 and you have 32 cpus.

The bogus expansion occurs in lib/cpumask.c:__next_cpu()

After processing cpu 63, we'll use offset==64 and thus try
to make the undefined shift I described above, causing the
caller's cpumask iteration loop to run forever.

This code was really not tested very well at all.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2008-04-29 23:30 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison
2008-04-27 20:26 ` Linus Torvalds
2008-04-27 20:29   ` Harvey Harrison
2008-04-27 20:36     ` Al Viro
2008-04-27 20:38       ` Harvey Harrison
2008-04-27 20:38     ` Linus Torvalds
2008-04-27 21:02       ` Al Viro
2008-04-28 14:04   ` Thomas Gleixner
2008-04-28 15:10     ` Alexander van Heukelum
2008-04-28 15:58       ` Thomas Gleixner
2008-04-28 16:25     ` Linus Torvalds
2008-04-28 16:47       ` Thomas Gleixner
2008-04-28 16:54         ` Linus Torvalds
2008-04-28 19:26           ` Thomas Gleixner
2008-04-28 19:37             ` Linus Torvalds
2008-04-28 21:55               ` Ingo Molnar
2008-04-29 10:01               ` [PATCH] bitops: remove "optimizations" Thomas Gleixner
2008-04-29 10:03                 ` David Miller
2008-04-29 12:34                   ` David Miller
2008-04-29 14:20                     ` Ingo Molnar
2008-04-29 22:31                       ` David Miller
2008-04-29 16:51                     ` Thomas Gleixner
2008-04-29 22:58                       ` David Miller
2008-04-29 23:30                         ` David Miller
2008-04-28 19:57           ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen
2008-04-28 14:58   ` Alexander van Heukelum
2008-04-28 14:32 ` Alexander van Heukelum

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox