* [PATCH] bitops: simplify generic bit finding functions @ 2008-04-27 20:19 Harvey Harrison 2008-04-27 20:26 ` Linus Torvalds 2008-04-28 14:32 ` Alexander van Heukelum 0 siblings, 2 replies; 27+ messages in thread From: Harvey Harrison @ 2008-04-27 20:19 UTC (permalink / raw) To: Ingo Molnar, Linus Torvalds; +Cc: Andrew Morton, LKML No need for a sentinal if we explicitly catch the no bits/all bits set cases, make it clear they are special cases returning size/BITS_PER_LONG. Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> --- include/linux/bitops.h | 93 ++++++++++++++++++++---------------------------- 1 files changed, 39 insertions(+), 54 deletions(-) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 48bde60..d9eb58a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -127,18 +127,17 @@ extern unsigned long __find_first_bit(const unsigned long *addr, static __always_inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) - return __ffs((*addr) | (1ul << size)); - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); + /* + * Avoid a function call if the bitmap size is a constant and not + * bigger than BITS_PER_LONG. Ensure we return size if there are + * no set bits. + */ + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { + if (*addr == 0) + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; + else + return __ffs(*addr); + } /* size is not constant or too big */ return __find_first_bit(addr, size); @@ -157,20 +156,17 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr, static __always_inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - return __ffs(~(*addr) | (1ul << size)); + /* + * Avoid a function call if the bitmap size is a constant and not + * bigger than BITS_PER_LONG. Ensure we return size if all bits set. + */ + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { + if ((~(*addr)) == 0) + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; + else + return __ffs(~(*addr)); } - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); - /* size is not constant or too big */ return __find_first_zero_bit(addr, size); } @@ -192,22 +188,17 @@ find_next_bit(const unsigned long *addr, unsigned long size, { unsigned long value; - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + /* + * Avoid a function call if the bitmap size is a constant and not + * bigger than BITS_PER_LONG. Ensure we return size if there are + * no set bits. + */ + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { value = (*addr) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); + if (value == 0) + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; + else + return __ffs(value); } /* size is not constant or too big */ @@ -229,22 +220,16 @@ find_next_zero_bit(const unsigned long *addr, unsigned long size, { unsigned long value; - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { + /* + * Avoid a function call if the bitmap size is a constant and not + * bigger than BITS_PER_LONG. Ensure we return size if all bits set. + */ + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { value = (~(*addr)) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); + if (value == 0) + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; + else + return __ffs(value); } /* size is not constant or too big */ -- 1.5.5.1.270.g89765 ^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison @ 2008-04-27 20:26 ` Linus Torvalds 2008-04-27 20:29 ` Harvey Harrison ` (2 more replies) 2008-04-28 14:32 ` Alexander van Heukelum 1 sibling, 3 replies; 27+ messages in thread From: Linus Torvalds @ 2008-04-27 20:26 UTC (permalink / raw) To: Harvey Harrison; +Cc: Ingo Molnar, Andrew Morton, LKML On Sun, 27 Apr 2008, Harvey Harrison wrote: > > No need for a sentinal if we explicitly catch the no bits/all bits set > cases, make it clear they are special cases returning size/BITS_PER_LONG. These things need to be re-done anyway. David already complained that the thing inlines to something much much too big. Let's just make it not be an inline at all. Linus ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:26 ` Linus Torvalds @ 2008-04-27 20:29 ` Harvey Harrison 2008-04-27 20:36 ` Al Viro 2008-04-27 20:38 ` Linus Torvalds 2008-04-28 14:04 ` Thomas Gleixner 2008-04-28 14:58 ` Alexander van Heukelum 2 siblings, 2 replies; 27+ messages in thread From: Harvey Harrison @ 2008-04-27 20:29 UTC (permalink / raw) To: Linus Torvalds; +Cc: Ingo Molnar, Andrew Morton, LKML On Sun, 2008-04-27 at 13:26 -0700, Linus Torvalds wrote: > > On Sun, 27 Apr 2008, Harvey Harrison wrote: > > > > No need for a sentinal if we explicitly catch the no bits/all bits set > > cases, make it clear they are special cases returning size/BITS_PER_LONG. > > These things need to be re-done anyway. David already complained that the > thing inlines to something much much too big. > > Let's just make it not be an inline at all. Oh, I didn't realize, I only did this because sparse started spewing out lots of: include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long due to shift by size there, and again on line 202...I just wanted something that sparse wouldn't warn about and was a little easier to understand to boot. Cheers, Harvey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:29 ` Harvey Harrison @ 2008-04-27 20:36 ` Al Viro 2008-04-27 20:38 ` Harvey Harrison 2008-04-27 20:38 ` Linus Torvalds 1 sibling, 1 reply; 27+ messages in thread From: Al Viro @ 2008-04-27 20:36 UTC (permalink / raw) To: Harvey Harrison Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, LKML, linux-sparse On Sun, Apr 27, 2008 at 01:29:21PM -0700, Harvey Harrison wrote: > Oh, I didn't realize, I only did this because sparse started spewing out > lots of: > include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long > > due to shift by size there, and again on line 202...I just wanted something > that sparse wouldn't warn about and was a little easier to understand to boot. That's a sparse problem, really. I wonder if we simply should introduce a new node type: EXPR_WARN. So that expand would generate those from things like division by zero/overflow/bad shift *and* emitting an insn for those would generate a stored warning. Objections? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:36 ` Al Viro @ 2008-04-27 20:38 ` Harvey Harrison 0 siblings, 0 replies; 27+ messages in thread From: Harvey Harrison @ 2008-04-27 20:38 UTC (permalink / raw) To: Al Viro; +Cc: Linus Torvalds, Ingo Molnar, Andrew Morton, LKML, linux-sparse On Sun, 2008-04-27 at 21:36 +0100, Al Viro wrote: > On Sun, Apr 27, 2008 at 01:29:21PM -0700, Harvey Harrison wrote: > > > Oh, I didn't realize, I only did this because sparse started spewing out > > lots of: > > include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long > > > > due to shift by size there, and again on line 202...I just wanted something > > that sparse wouldn't warn about and was a little easier to understand to boot. > > That's a sparse problem, really. I wonder if we simply should introduce a > new node type: EXPR_WARN. So that expand would generate those from things > like division by zero/overflow/bad shift *and* emitting an insn for those > would generate a stored warning. Well, even though it is a sparse problem, I think my revised version was cleaner code anyway. > > Objections? None here Harvey ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:29 ` Harvey Harrison 2008-04-27 20:36 ` Al Viro @ 2008-04-27 20:38 ` Linus Torvalds 2008-04-27 21:02 ` Al Viro 1 sibling, 1 reply; 27+ messages in thread From: Linus Torvalds @ 2008-04-27 20:38 UTC (permalink / raw) To: Harvey Harrison; +Cc: Ingo Molnar, Andrew Morton, LKML On Sun, 27 Apr 2008, Harvey Harrison wrote: > > Oh, I didn't realize, I only did this because sparse started spewing out > lots of: > include/linux/bitops.h:166:32: warning: shift too big (65536) for type unsigned long Well, that's really just a sparse bug/misfeature that didn't matter before. It happens because the warning is done as part of constant expression evaluation, but then the expression isn't actually *used*, so when we optimize it away - at a later date - it's too late to undo the warning. I don't rightly know how to fix it. We do want to do the constant evaluation early (since all the optimizations that may then make it a non-issue depends on constants being constants!), but in order to not output the warning we'd have to turn the constant into a "constant with warning 'xyz' if used". Which we have no support for in sparse yet. Linus ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:38 ` Linus Torvalds @ 2008-04-27 21:02 ` Al Viro 0 siblings, 0 replies; 27+ messages in thread From: Al Viro @ 2008-04-27 21:02 UTC (permalink / raw) To: Linus Torvalds; +Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML On Sun, Apr 27, 2008 at 01:38:34PM -0700, Linus Torvalds wrote: > It happens because the warning is done as part of constant expression > evaluation, but then the expression isn't actually *used*, so when we > optimize it away - at a later date - it's too late to undo the warning. > > I don't rightly know how to fix it. We do want to do the constant > evaluation early (since all the optimizations that may then make it a > non-issue depends on constants being constants!), but in order to not > output the warning we'd have to turn the constant into a "constant with > warning 'xyz' if used". > > Which we have no support for in sparse yet. It's not even a constant, really... What we need is * don't fold further if that sucker would be evaluated (i.e. 0 && <...> is folded, 0 & <...> is not) * don't consider an integer constant expression with value 0 for purposes of null pointer constant handling * emit corresponding insn in linearize_expression() when we run into such sucker. * somewhere around the call of vrfy_flow() walk through insns in remaining bb (we'd done dead code elimination already) and spew postponed warning on each such insn. Probably. Assuming that I'm not entirely confused about what's going on in that area - which is quite possible. Folks, could somebody familiar with the backend comment on the last part? ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:26 ` Linus Torvalds 2008-04-27 20:29 ` Harvey Harrison @ 2008-04-28 14:04 ` Thomas Gleixner 2008-04-28 15:10 ` Alexander van Heukelum 2008-04-28 16:25 ` Linus Torvalds 2008-04-28 14:58 ` Alexander van Heukelum 2 siblings, 2 replies; 27+ messages in thread From: Thomas Gleixner @ 2008-04-28 14:04 UTC (permalink / raw) To: Linus Torvalds Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Sun, 27 Apr 2008, Linus Torvalds wrote: > On Sun, 27 Apr 2008, Harvey Harrison wrote: > > > > No need for a sentinal if we explicitly catch the no bits/all bits set > > cases, make it clear they are special cases returning size/BITS_PER_LONG. > > These things need to be re-done anyway. David already complained that the > thing inlines to something much much too big. The delta between the inlined version including the constant optimizations and the original non inlined version is exactly 1400 bytes on a sparc64 defconfig build. > Let's just make it not be an inline at all. Which makes the whole constant optimization moot. How about making the optimization controlled by a config switch so arch maintainers can decide whether they want to enable the constant optimization or unconditionally call the lib function ? See patch below. It gives back the 1400 bytes on SPARC64 and other platforms that have no instruction for find bit. Thanks, tglx ----------> Subject: bitops: optional bitops mapsize optimization on config switch From: Thomas Gleixner <tglx@linutronix.de> Date: Mon, 28 Apr 2008 00:22:06 +0200 The mapsize optimizations which were moved from x86 to the generic code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the binary size on non x86 architectures. Make the optimization depend on a config switch so architecture maintainers can decide. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/Kconfig.cpu | 1 + include/linux/bitops.h | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) Index: linux-2.6/arch/x86/Kconfig.cpu =================================================================== --- linux-2.6.orig/arch/x86/Kconfig.cpu +++ linux-2.6/arch/x86/Kconfig.cpu @@ -282,6 +282,7 @@ config X86_CPU def_bool y select GENERIC_FIND_FIRST_BIT select GENERIC_FIND_NEXT_BIT + select GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE config X86_GENERIC bool "Generic x86 support" Index: linux-2.6/include/linux/bitops.h =================================================================== --- linux-2.6.orig/include/linux/bitops.h +++ linux-2.6/include/linux/bitops.h @@ -190,6 +190,7 @@ static __always_inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE unsigned long value; /* Avoid a function call if the bitmap size is a constant */ @@ -209,7 +210,7 @@ find_next_bit(const unsigned long *addr, value = (*addr) & ((~0ul) << offset); return (value == 0) ? BITS_PER_LONG : __ffs(value); } - +#endif /* size is not constant or too big */ return __find_next_bit(addr, size, offset); } @@ -227,6 +228,7 @@ static __always_inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE unsigned long value; /* Avoid a function call if the bitmap size is a constant */ @@ -246,7 +248,7 @@ find_next_zero_bit(const unsigned long * value = (~(*addr)) & ((~0ul) << offset); return (value == 0) ? BITS_PER_LONG : __ffs(value); } - +#endif /* size is not constant or too big */ return __find_next_zero_bit(addr, size, offset); } ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 14:04 ` Thomas Gleixner @ 2008-04-28 15:10 ` Alexander van Heukelum 2008-04-28 15:58 ` Thomas Gleixner 2008-04-28 16:25 ` Linus Torvalds 1 sibling, 1 reply; 27+ messages in thread From: Alexander van Heukelum @ 2008-04-28 15:10 UTC (permalink / raw) To: Thomas Gleixner, Linus Torvalds Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller > See patch below. It gives back the 1400 bytes on SPARC64 and other > platforms that have no instruction for find bit. Hi, I, personally, see this patch as a stopgap measure. The real problem is that the generic __ffs is inlined and ends up generating a lot of instructions. Until the generic implementation of __ffs is fixed not to be an enormous inline function, this seems to be a reasonable thing to do, though it's a shame that the optimization is now default- off for architectures with a good implementation of __ffs. Greetings, Alexander > Thanks, > tglx > > ----------> > > Subject: bitops: optional bitops mapsize optimization on config switch > From: Thomas Gleixner <tglx@linutronix.de> > Date: Mon, 28 Apr 2008 00:22:06 +0200 > > The mapsize optimizations which were moved from x86 to the generic > code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the > binary size on non x86 architectures. > > Make the optimization depend on a config switch so architecture > maintainers can decide. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Acked-by: Ingo Molnar <mingo@elte.hu> > > --- > arch/x86/Kconfig.cpu | 1 + > include/linux/bitops.h | 6 ++++-- > 2 files changed, 5 insertions(+), 2 deletions(-) > > Index: linux-2.6/arch/x86/Kconfig.cpu > =================================================================== > --- linux-2.6.orig/arch/x86/Kconfig.cpu > +++ linux-2.6/arch/x86/Kconfig.cpu > @@ -282,6 +282,7 @@ config X86_CPU > def_bool y > select GENERIC_FIND_FIRST_BIT > select GENERIC_FIND_NEXT_BIT > + select GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE > > config X86_GENERIC > bool "Generic x86 support" > Index: linux-2.6/include/linux/bitops.h > =================================================================== > --- linux-2.6.orig/include/linux/bitops.h > +++ linux-2.6/include/linux/bitops.h > @@ -190,6 +190,7 @@ static __always_inline unsigned long > find_next_bit(const unsigned long *addr, unsigned long size, > unsigned long offset) > { > +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE > unsigned long value; > > /* Avoid a function call if the bitmap size is a constant */ > @@ -209,7 +210,7 @@ find_next_bit(const unsigned long *addr, > value = (*addr) & ((~0ul) << offset); > return (value == 0) ? BITS_PER_LONG : __ffs(value); > } > - > +#endif > /* size is not constant or too big */ > return __find_next_bit(addr, size, offset); > } > @@ -227,6 +228,7 @@ static __always_inline unsigned long > find_next_zero_bit(const unsigned long *addr, unsigned long size, > unsigned long offset) > { > +#ifdef CONFIG_GENERIC_FIND_BIT_MAPSIZE_OPTIMIZE > unsigned long value; > > /* Avoid a function call if the bitmap size is a constant */ > @@ -246,7 +248,7 @@ find_next_zero_bit(const unsigned long * > value = (~(*addr)) & ((~0ul) << offset); > return (value == 0) ? BITS_PER_LONG : __ffs(value); > } > - > +#endif > /* size is not constant or too big */ > return __find_next_zero_bit(addr, size, offset); > } > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" > in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > > -- Alexander van Heukelum heukelum@fastmail.fm -- http://www.fastmail.fm - mmm... Fastmail... ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 15:10 ` Alexander van Heukelum @ 2008-04-28 15:58 ` Thomas Gleixner 0 siblings, 0 replies; 27+ messages in thread From: Thomas Gleixner @ 2008-04-28 15:58 UTC (permalink / raw) To: Alexander van Heukelum Cc: Linus Torvalds, Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Alexander van Heukelum wrote: > > See patch below. It gives back the 1400 bytes on SPARC64 and other > > platforms that have no instruction for find bit. > > Hi, > > I, personally, see this patch as a stopgap measure. The real problem > is that the generic __ffs is inlined and ends up generating a lot > of instructions. > > Until the generic implementation of __ffs is fixed not to be an > enormous inline function, this seems to be a reasonable thing > to do, though it's a shame that the optimization is now default- > off for architectures with a good implementation of __ffs. Yup, it hasn't been there before and we explicitely enable it on x86. If there are other archs which have a good one it's a nobrainer to enable it. Thanks, tglx ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 14:04 ` Thomas Gleixner 2008-04-28 15:10 ` Alexander van Heukelum @ 2008-04-28 16:25 ` Linus Torvalds 2008-04-28 16:47 ` Thomas Gleixner 1 sibling, 1 reply; 27+ messages in thread From: Linus Torvalds @ 2008-04-28 16:25 UTC (permalink / raw) To: Thomas Gleixner Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Thomas Gleixner wrote: > > How about making the optimization controlled by a config switch so > arch maintainers can decide whether they want to enable the constant > optimization or unconditionally call the lib function ? No. This is just making that damn header line look worse and worse. Is there a _reason_ to optimize this stupid function this way? Linus ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 16:25 ` Linus Torvalds @ 2008-04-28 16:47 ` Thomas Gleixner 2008-04-28 16:54 ` Linus Torvalds 0 siblings, 1 reply; 27+ messages in thread From: Thomas Gleixner @ 2008-04-28 16:47 UTC (permalink / raw) To: Linus Torvalds Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Linus Torvalds wrote: > > > > How about making the optimization controlled by a config switch so > > arch maintainers can decide whether they want to enable the constant > > optimization or unconditionally call the lib function ? > > No. > > This is just making that damn header line look worse and worse. > > Is there a _reason_ to optimize this stupid function this way? On architectures which have a find bit instruction we can replace the call to the library function by a single instruction when the size of the bitmap is less/equal bits per long and when the bitnr is a constant. Thanks, tglx ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 16:47 ` Thomas Gleixner @ 2008-04-28 16:54 ` Linus Torvalds 2008-04-28 19:26 ` Thomas Gleixner 2008-04-28 19:57 ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen 0 siblings, 2 replies; 27+ messages in thread From: Linus Torvalds @ 2008-04-28 16:54 UTC (permalink / raw) To: Thomas Gleixner Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Thomas Gleixner wrote: > On Mon, 28 Apr 2008, Linus Torvalds wrote: > > > > > > How about making the optimization controlled by a config switch so > > > arch maintainers can decide whether they want to enable the constant > > > optimization or unconditionally call the lib function ? > > > > No. > > > > This is just making that damn header line look worse and worse. > > > > Is there a _reason_ to optimize this stupid function this way? > > On architectures which have a find bit instruction we can replace the > call to the library function by a single instruction when the size of > the bitmap is less/equal bits per long and when the bitnr is a > constant. That's not what I asked. I asked whether there is a *reason* to optimize this and cause all these stupid problems. Is there a real hot path anywhere that actually uses this and depends on it? Linus ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 16:54 ` Linus Torvalds @ 2008-04-28 19:26 ` Thomas Gleixner 2008-04-28 19:37 ` Linus Torvalds 2008-04-28 19:57 ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen 1 sibling, 1 reply; 27+ messages in thread From: Thomas Gleixner @ 2008-04-28 19:26 UTC (permalink / raw) To: Linus Torvalds Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Linus Torvalds wrote: > > > > How about making the optimization controlled by a config switch so > > > > arch maintainers can decide whether they want to enable the constant > > > > optimization or unconditionally call the lib function ? > > > > > > No. > > > > > > This is just making that damn header line look worse and worse. > > > > > > Is there a _reason_ to optimize this stupid function this way? > > > > On architectures which have a find bit instruction we can replace the > > call to the library function by a single instruction when the size of > > the bitmap is less/equal bits per long and when the bitnr is a > > constant. > > That's not what I asked. > > I asked whether there is a *reason* to optimize this and cause all these > stupid problems. > > Is there a real hot path anywhere that actually uses this and depends on > it? Sorry, misunderstood your question. I checked the use cases and have not found a single one for find_next_(zero_)bit() which makes use of this micro optimization. In fact the .text section of vmlinux of the "optimized" and the straight function call are identical. So the effect of those micro optimizations is exactly zero. find_first_(zero_)bit() has a couple of places where the optimization hits, but the code size reduction is mere 21 bytes and the use cases are not in real hot pathes AFAICT. I doubt that that is worth the trouble and we should just remove those inlines alltogether. This was discussed before, but Andi objected to remove those micro optimizations and nobody had time to actually verify the real benefits. I'll whip up a patch. Thanks, tglx ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 19:26 ` Thomas Gleixner @ 2008-04-28 19:37 ` Linus Torvalds 2008-04-28 21:55 ` Ingo Molnar 2008-04-29 10:01 ` [PATCH] bitops: remove "optimizations" Thomas Gleixner 0 siblings, 2 replies; 27+ messages in thread From: Linus Torvalds @ 2008-04-28 19:37 UTC (permalink / raw) To: Thomas Gleixner Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller On Mon, 28 Apr 2008, Thomas Gleixner wrote: > > I doubt that that is worth the trouble and we should just remove those > inlines alltogether. This was discussed before, but Andi objected to > remove those micro optimizations and nobody had time to actually > verify the real benefits. Thanks for checking. In general, I think we should act the other way: we should *assume* that inlines and complex macros in header files are not worth it and we'd be better off with just plain function calls, unless it's really obvious that the inline function is always worth it. We have tons of good examples of inline functions in headers that really *really* want to be that way: <linux/list.h> is generally a collection of things that clearly expand to somthing that is (a) easily critical and (b) inlining things generally really causes a smaller code footprint than having a "call" instruction and all the argument setup - regardless of how many times it is done. So we do have cases where the inlines are obviously worth it. But in general, I think we should try to move things from the header files into *.c files unless there is a really clear reason for keeping it that way. (Some inline functions do depend on things like constant arguments goign away, with kmalloc() being a good example of some really nasty header file tricks that are likely worth it despite their extreme ugliness) Linus ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 19:37 ` Linus Torvalds @ 2008-04-28 21:55 ` Ingo Molnar 2008-04-29 10:01 ` [PATCH] bitops: remove "optimizations" Thomas Gleixner 1 sibling, 0 replies; 27+ messages in thread From: Ingo Molnar @ 2008-04-28 21:55 UTC (permalink / raw) To: Linus Torvalds Cc: Thomas Gleixner, Harvey Harrison, Andrew Morton, LKML, David Miller * Linus Torvalds <torvalds@linux-foundation.org> wrote: > So we do have cases where the inlines are obviously worth it. But in > general, I think we should try to move things from the header files > into *.c files unless there is a really clear reason for keeping it > that way. there's another benefit, and in asm-x86 we prefer to move inlines to .c files even in borderline cases because it simplifies the type dependencies: not having to fully define all types at the function prototype site avoids include file dependency hell. Putting things like a task struct dereference into a lowlevel inline file easily causes dependency problems that causes people to use macros instead - which have their own set of readability and side-effect problems. a third argument is that inlines seldom get smaller. So if they are borderline and we move them into a .c, and later on the function gets larger, no harm is done. But if we keep the inline in a .h in the borderline case and we grow the inline later on, the whole kernel bloats in a multiplied way, without any apparent direct feedback to the developer that something wrong and harmful just happened. Ingo ^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH] bitops: remove "optimizations" 2008-04-28 19:37 ` Linus Torvalds 2008-04-28 21:55 ` Ingo Molnar @ 2008-04-29 10:01 ` Thomas Gleixner 2008-04-29 10:03 ` David Miller 1 sibling, 1 reply; 27+ messages in thread From: Thomas Gleixner @ 2008-04-29 10:01 UTC (permalink / raw) To: Linus Torvalds Cc: Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller The mapsize optimizations which were moved from x86 to the generic code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the binary size on non x86 architectures. Looking into the real effects of the "optimizations" it turned out that they are not used in find_next_bit() and find_next_zero_bit(). The ones in find_first_bit() and find_first_zero_bit() are used in a couple of places but none of them is a real hot path. Remove the "optimizations" all together and call the library functions unconditionally. Boot-tested on x86 and compile tested on every cross compiler I have. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- include/linux/bitops.h | 115 +++++-------------------------------------------- lib/find_next_bit.c | 22 ++++----- 2 files changed, 22 insertions(+), 115 deletions(-) Index: linux-2.6/include/linux/bitops.h =================================================================== --- linux-2.6.orig/include/linux/bitops.h +++ linux-2.6/include/linux/bitops.h @@ -114,8 +114,6 @@ static inline unsigned fls_long(unsigned #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -extern unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size); /** * find_first_bit - find the first set bit in a memory region @@ -124,28 +122,8 @@ extern unsigned long __find_first_bit(co * * Returns the bit number of the first set bit. */ -static __always_inline unsigned long -find_first_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) - return __ffs((*addr) | (1ul << size)); - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); - - /* size is not constant or too big */ - return __find_first_bit(addr, size); -} - -extern unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size); +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); /** * find_first_zero_bit - find the first cleared bit in a memory region @@ -154,31 +132,12 @@ extern unsigned long __find_first_zero_b * * Returns the bit number of the first cleared bit. */ -static __always_inline unsigned long -find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - return __ffs(~(*addr) | (1ul << size)); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); - - /* size is not constant or too big */ - return __find_first_zero_bit(addr, size); -} +extern unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); + #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef CONFIG_GENERIC_FIND_NEXT_BIT -extern unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); /** * find_next_bit - find the next set bit in a memory region @@ -186,36 +145,8 @@ extern unsigned long __find_next_bit(con * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_bit(addr, size, offset); -} - -extern unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); +extern unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -223,33 +154,11 @@ extern unsigned long __find_next_zero_bi * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_zero_bit(addr, size, offset); -} + +extern unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #endif /* __KERNEL__ */ #endif Index: linux-2.6/lib/find_next_bit.c =================================================================== --- linux-2.6.orig/lib/find_next_bit.c +++ linux-2.6/lib/find_next_bit.c @@ -20,8 +20,8 @@ /* * Find the next set bit in a memory region. */ -unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -58,14 +58,14 @@ found_first: found_middle: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_next_bit); +EXPORT_SYMBOL(find_next_bit); /* * This implementation of find_{first,next}_zero_bit was stolen from * Linus' asm-alpha/bitops.h. */ -unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -102,15 +102,14 @@ found_first: found_middle: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_next_zero_bit); +EXPORT_SYMBOL(find_next_zero_bit); #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT /* * Find the first set bit in a memory region. */ -unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -131,13 +130,12 @@ unsigned long __find_first_bit(const uns found: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_first_bit); +EXPORT_SYMBOL(find_first_bit); /* * Find the first cleared bit in a memory region. */ -unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -158,7 +156,7 @@ unsigned long __find_first_zero_bit(cons found: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_first_zero_bit); +EXPORT_SYMBOL(find_first_zero_bit); #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef __BIG_ENDIAN ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 10:01 ` [PATCH] bitops: remove "optimizations" Thomas Gleixner @ 2008-04-29 10:03 ` David Miller 2008-04-29 12:34 ` David Miller 0 siblings, 1 reply; 27+ messages in thread From: David Miller @ 2008-04-29 10:03 UTC (permalink / raw) To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel From: Thomas Gleixner <tglx@linutronix.de> Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST) > The mapsize optimizations which were moved from x86 to the generic > code in commit 64970b68d2b3ed32b964b0b30b1b98518fde388e increased the > binary size on non x86 architectures. > > Looking into the real effects of the "optimizations" it turned out > that they are not used in find_next_bit() and find_next_zero_bit(). > > The ones in find_first_bit() and find_first_zero_bit() are used in a > couple of places but none of them is a real hot path. > > Remove the "optimizations" all together and call the library functions > unconditionally. > > Boot-tested on x86 and compile tested on every cross compiler I have. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: David S. Miller <davem@davemloft.net> ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 10:03 ` David Miller @ 2008-04-29 12:34 ` David Miller 2008-04-29 14:20 ` Ingo Molnar 2008-04-29 16:51 ` Thomas Gleixner 0 siblings, 2 replies; 27+ messages in thread From: David Miller @ 2008-04-29 12:34 UTC (permalink / raw) To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel From: David Miller <davem@davemloft.net> Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT) > From: Thomas Gleixner <tglx@linutronix.de> > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST) > > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > > Acked-by: David S. Miller <davem@davemloft.net> Ironically, I just bisected sparc64 bootup failures to the following changeset. It's very late here, and I haven't looked into the details, but it seems to wedge in free_area_init_nodes() on a non-NUMA system with CONFIG_NUMA disabled. Isn't it funny that this optimization not only was useless, but also broke things. :-/ 64970b68d2b3ed32b964b0b30b1b98518fde388e is first bad commit commit 64970b68d2b3ed32b964b0b30b1b98518fde388e Author: Alexander van Heukelum <heukelum@mailshack.com> Date: Tue Mar 11 16:17:19 2008 +0100 x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps This moves an optimization for searching constant-sized small bitmaps form x86_64-specific to generic code. On an i386 defconfig (the x86#testing one), the size of vmlinux hardly changes with this applied. I have observed only four places where this optimization avoids a call into find_next_bit: In the functions return_unused_surplus_pages, alloc_fresh_huge_page, and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap. In __next_cpu a call is avoided for a 32-bit bitmap. That's it. On x86_64, 52 locations are optimized with a minimal increase in code size: Current #testing defconfig: 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392637 846592 724424 6963653 6a41c5 vmlinux After removing the x86_64 specific optimization for find_next_*bit: 94 x bsf, 79 x find_next_*bit text data bss dec hex filename 5392358 846592 724424 6963374 6a40ae vmlinux After this patch (making the optimization generic): 146 x bsf, 27 x find_next_*bit text data bss dec hex filename 5392396 846592 724424 6963412 6a40d4 vmlinux [ tglx@linutronix.de: build fixes ] Signed-off-by: Ingo Molnar <mingo@elte.hu> :040000 040000 c0407f7df7dc5333c78c300780931a269ae0dedd 2f6ef634a35b6dd46e40ea70128c39a742e60501 M include :040000 040000 556ee6ccb15cdbb1aa5a690c732a5492d58dfe6f 937d05977f46056c12b5da64ca62959c06a912c0 M lib ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 12:34 ` David Miller @ 2008-04-29 14:20 ` Ingo Molnar 2008-04-29 22:31 ` David Miller 2008-04-29 16:51 ` Thomas Gleixner 1 sibling, 1 reply; 27+ messages in thread From: Ingo Molnar @ 2008-04-29 14:20 UTC (permalink / raw) To: David Miller; +Cc: tglx, torvalds, harvey.harrison, akpm, linux-kernel * David Miller <davem@davemloft.net> wrote: > From: David Miller <davem@davemloft.net> > Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT) > > > From: Thomas Gleixner <tglx@linutronix.de> > > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST) > > > > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > > > > Acked-by: David S. Miller <davem@davemloft.net> > > Ironically, I just bisected sparc64 bootup failures to the following > changeset. It's very late here, and I haven't looked into the > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA > system with CONFIG_NUMA disabled. hm. I guess the next thing you plan to check is whether Thomas's patch fixes it? > Isn't it funny that this optimization not only was useless, but also > broke things. :-/ yeah and sorry :-/ It would still be nice to understand why this happened, it looks like a weird (and unexpected) interaction. Ingo ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 14:20 ` Ingo Molnar @ 2008-04-29 22:31 ` David Miller 0 siblings, 0 replies; 27+ messages in thread From: David Miller @ 2008-04-29 22:31 UTC (permalink / raw) To: mingo; +Cc: tglx, torvalds, harvey.harrison, akpm, linux-kernel From: Ingo Molnar <mingo@elte.hu> Date: Tue, 29 Apr 2008 16:20:39 +0200 > > * David Miller <davem@davemloft.net> wrote: > > > Ironically, I just bisected sparc64 bootup failures to the following > > changeset. It's very late here, and I haven't looked into the > > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA > > system with CONFIG_NUMA disabled. > > hm. I guess the next thing you plan to check is whether Thomas's patch > fixes it? Yes, I just needed some sleep first, which I just obtained :-) ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 12:34 ` David Miller 2008-04-29 14:20 ` Ingo Molnar @ 2008-04-29 16:51 ` Thomas Gleixner 2008-04-29 22:58 ` David Miller 1 sibling, 1 reply; 27+ messages in thread From: Thomas Gleixner @ 2008-04-29 16:51 UTC (permalink / raw) To: David Miller; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel On Tue, 29 Apr 2008, David Miller wrote: > From: David Miller <davem@davemloft.net> > Date: Tue, 29 Apr 2008 03:03:32 -0700 (PDT) > > > From: Thomas Gleixner <tglx@linutronix.de> > > Date: Tue, 29 Apr 2008 12:01:02 +0200 (CEST) > > > > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > > > > Acked-by: David S. Miller <davem@davemloft.net> > > Ironically, I just bisected sparc64 bootup failures to the following > changeset. It's very late here, and I haven't looked into the > details, but it seems to wedge in free_area_init_nodes() on a non-NUMA > system with CONFIG_NUMA disabled. > > Isn't it funny that this optimization not only was useless, but also > broke things. :-/ The question is, whether it broke things or just unearthed some bug hidden elsewhere. Thanks, tglx ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 16:51 ` Thomas Gleixner @ 2008-04-29 22:58 ` David Miller 2008-04-29 23:30 ` David Miller 0 siblings, 1 reply; 27+ messages in thread From: David Miller @ 2008-04-29 22:58 UTC (permalink / raw) To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel From: Thomas Gleixner <tglx@linutronix.de> Date: Tue, 29 Apr 2008 18:51:51 +0200 (CEST) > The question is, whether it broke things or just unearthed some bug > hidden elsewhere. So I did a quick test, just #if 0'ing out the optimization inline portions of the find_first_bit() code in linux/bitops.h, and forcing it to always unconditionally call __find_first_bit() fixes the regression. Given that others who tested could not find one case where the optimization cases actually applied, and it's breaking things for me, my theory is that it's triggering for some obscure case on sparc64 and thus showing a bug in these optimizations since in practice I'm the only person to actually test this new code. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: remove "optimizations" 2008-04-29 22:58 ` David Miller @ 2008-04-29 23:30 ` David Miller 0 siblings, 0 replies; 27+ messages in thread From: David Miller @ 2008-04-29 23:30 UTC (permalink / raw) To: tglx; +Cc: torvalds, harvey.harrison, mingo, akpm, linux-kernel From: David Miller <davem@davemloft.net> Date: Tue, 29 Apr 2008 15:58:24 -0700 (PDT) > Given that others who tested could not find one case where the > optimization cases actually applied, and it's breaking things for me, > my theory is that it's triggering for some obscure case on sparc64 and > thus showing a bug in these optimizations since in practice I'm the > only person to actually test this new code. Ok, I think I see the problem. The core issue is that (X << N) is undefined when N is >= the word size, but that's exactly what the find_next_bit() inline optimizations do. This optimization code will trigger on 64-bit if NR_CPUS is set to 64, and you actually have 64 cpus. It should also occur on 32-bit if NR_CPUS=32 and you have 32 cpus. The bogus expansion occurs in lib/cpumask.c:__next_cpu() After processing cpu 63, we'll use offset==64 and thus try to make the undefined shift I described above, causing the caller's cpumask iteration loop to run forever. This code was really not tested very well at all. ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-28 16:54 ` Linus Torvalds 2008-04-28 19:26 ` Thomas Gleixner @ 2008-04-28 19:57 ` Andi Kleen 1 sibling, 0 replies; 27+ messages in thread From: Andi Kleen @ 2008-04-28 19:57 UTC (permalink / raw) To: Linus Torvalds Cc: Thomas Gleixner, Harvey Harrison, Ingo Molnar, Andrew Morton, LKML, David Miller Linus Torvalds <torvalds@linux-foundation.org> writes: > > Is there a real hot path anywhere that actually uses this and depends on > it? A long time ago during profiling I found that in the scheduler back then for_each_cpu() with find_next_bit() was somewhat hot. But I'm not sure it still would be in the new scheduler anyways and the test case a little dumb anyways (overscheduling user space) Still I think for_each_cpu() should be reasonably stream lined code. -Andi ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:26 ` Linus Torvalds 2008-04-27 20:29 ` Harvey Harrison 2008-04-28 14:04 ` Thomas Gleixner @ 2008-04-28 14:58 ` Alexander van Heukelum 2 siblings, 0 replies; 27+ messages in thread From: Alexander van Heukelum @ 2008-04-28 14:58 UTC (permalink / raw) To: Linus Torvalds, Harvey Harrison, David S. Miller Cc: Ingo Molnar, Andrew Morton, LKML On Sun, 27 Apr 2008 13:26:34 -0700 (PDT), "Linus Torvalds" <torvalds@linux-foundation.org> said: > On Sun, 27 Apr 2008, Harvey Harrison wrote: > > > > No need for a sentinal if we explicitly catch the no bits/all bits set > > cases, make it clear they are special cases returning size/BITS_PER_LONG. > > These things need to be re-done anyway. David already complained that the > thing inlines to something much much too big. > > Let's just make it not be an inline at all. Hello, I think that's the wrong way to go about it. The problem that David Miller pointed out was that the small-bitmap optimization caused an inliningdisaster. Apparently, the generic version of __ffs is very big on sparc64. In my opinion the implementation of __ffs should be made out of line. If you move the wrapper of find_first_bit etc out of line, all possibilities of optimization will be destroyed. Better to remove them completely, then. Greetings, Alexander > Linus > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" > in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > > -- Alexander van Heukelum heukelum@fastmail.fm -- http://www.fastmail.fm - Accessible with your email software or over the web ^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] bitops: simplify generic bit finding functions 2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison 2008-04-27 20:26 ` Linus Torvalds @ 2008-04-28 14:32 ` Alexander van Heukelum 1 sibling, 0 replies; 27+ messages in thread From: Alexander van Heukelum @ 2008-04-28 14:32 UTC (permalink / raw) To: Harvey Harrison, Ingo Molnar, Linus Torvalds; +Cc: Andrew Morton, LKML Hi, Ingo just pointed me to this thread. On Sun, 27 Apr 2008 13:19:51 -0700, "Harvey Harrison" <harvey.harrison@gmail.com> said: > No need for a sentinal if we explicitly catch the no bits/all bits set > cases, make it clear they are special cases returning size/BITS_PER_LONG. > > Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com> > --- > include/linux/bitops.h | 93 > ++++++++++++++++++++---------------------------- > 1 files changed, 39 insertions(+), 54 deletions(-) > > diff --git a/include/linux/bitops.h b/include/linux/bitops.h > index 48bde60..d9eb58a 100644 > --- a/include/linux/bitops.h > +++ b/include/linux/bitops.h > @@ -127,18 +127,17 @@ extern unsigned long __find_first_bit(const > unsigned long *addr, > static __always_inline unsigned long > find_first_bit(const unsigned long *addr, unsigned long size) > { > - /* Avoid a function call if the bitmap size is a constant */ > - /* and not bigger than BITS_PER_LONG. */ > - > - /* insert a sentinel so that __ffs returns size if there */ > - /* are no set bits in the bitmap */ > - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) > - return __ffs((*addr) | (1ul << size)); > - > - /* the result of __ffs(0) is undefined, so it needs to be */ > - /* handled separately */ > - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) > - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); > + /* > + * Avoid a function call if the bitmap size is a constant and not > + * bigger than BITS_PER_LONG. Ensure we return size if there are > + * no set bits. > + */ > + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { > + if (*addr == 0) > + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; > + else > + return __ffs(*addr); > + } Ehm... assume size=16 and *addr=0x80000000 then __ffs(*addr) = 31. Not 16. > /* size is not constant or too big */ > return __find_first_bit(addr, size); > @@ -157,20 +156,17 @@ extern unsigned long __find_first_zero_bit(const > unsigned long *addr, > static __always_inline unsigned long > find_first_zero_bit(const unsigned long *addr, unsigned long size) > { > - /* Avoid a function call if the bitmap size is a constant */ > - /* and not bigger than BITS_PER_LONG. */ > - > - /* insert a sentinel so that __ffs returns size if there */ > - /* are no set bits in the bitmap */ > - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { > - return __ffs(~(*addr) | (1ul << size)); > + /* > + * Avoid a function call if the bitmap size is a constant and not > + * bigger than BITS_PER_LONG. Ensure we return size if all bits set. > + */ > + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { > + if ((~(*addr)) == 0) > + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; > + else > + return __ffs(~(*addr)); > } > > - /* the result of __ffs(0) is undefined, so it needs to be */ > - /* handled separately */ > - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) > - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); > - > /* size is not constant or too big */ > return __find_first_zero_bit(addr, size); > } > @@ -192,22 +188,17 @@ find_next_bit(const unsigned long *addr, unsigned > long size, > { > unsigned long value; > > - /* Avoid a function call if the bitmap size is a constant */ > - /* and not bigger than BITS_PER_LONG. */ > - > - /* insert a sentinel so that __ffs returns size if there */ > - /* are no set bits in the bitmap */ > - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { > + /* > + * Avoid a function call if the bitmap size is a constant and not > + * bigger than BITS_PER_LONG. Ensure we return size if there are > + * no set bits. > + */ > + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { > value = (*addr) & ((~0ul) << offset); > - value |= (1ul << size); > - return __ffs(value); > - } > - > - /* the result of __ffs(0) is undefined, so it needs to be */ > - /* handled separately */ > - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { > - value = (*addr) & ((~0ul) << offset); > - return (value == 0) ? BITS_PER_LONG : __ffs(value); > + if (value == 0) > + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; > + else > + return __ffs(value); > } > > /* size is not constant or too big */ > @@ -229,22 +220,16 @@ find_next_zero_bit(const unsigned long *addr, > unsigned long size, > { > unsigned long value; > > - /* Avoid a function call if the bitmap size is a constant */ > - /* and not bigger than BITS_PER_LONG. */ > - > - /* insert a sentinel so that __ffs returns size if there */ > - /* are no set bits in the bitmap */ > - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { > - value = (~(*addr)) & ((~0ul) << offset); > - value |= (1ul << size); > - return __ffs(value); > - } > - > - /* the result of __ffs(0) is undefined, so it needs to be */ > - /* handled separately */ > - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { > + /* > + * Avoid a function call if the bitmap size is a constant and not > + * bigger than BITS_PER_LONG. Ensure we return size if all bits set. > + */ > + if (__builtin_constant_p(size) && (size <= BITS_PER_LONG)) { > value = (~(*addr)) & ((~0ul) << offset); > - return (value == 0) ? BITS_PER_LONG : __ffs(value); > + if (value == 0) > + return (size < BITS_PER_LONG) ? size : BITS_PER_LONG; > + else > + return __ffs(value); > } > > /* size is not constant or too big */ > -- > 1.5.5.1.270.g89765 -- Alexander van Heukelum heukelum@fastmail.fm -- http://www.fastmail.fm - mmm... Fastmail... ^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2008-04-29 23:30 UTC | newest] Thread overview: 27+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-04-27 20:19 [PATCH] bitops: simplify generic bit finding functions Harvey Harrison 2008-04-27 20:26 ` Linus Torvalds 2008-04-27 20:29 ` Harvey Harrison 2008-04-27 20:36 ` Al Viro 2008-04-27 20:38 ` Harvey Harrison 2008-04-27 20:38 ` Linus Torvalds 2008-04-27 21:02 ` Al Viro 2008-04-28 14:04 ` Thomas Gleixner 2008-04-28 15:10 ` Alexander van Heukelum 2008-04-28 15:58 ` Thomas Gleixner 2008-04-28 16:25 ` Linus Torvalds 2008-04-28 16:47 ` Thomas Gleixner 2008-04-28 16:54 ` Linus Torvalds 2008-04-28 19:26 ` Thomas Gleixner 2008-04-28 19:37 ` Linus Torvalds 2008-04-28 21:55 ` Ingo Molnar 2008-04-29 10:01 ` [PATCH] bitops: remove "optimizations" Thomas Gleixner 2008-04-29 10:03 ` David Miller 2008-04-29 12:34 ` David Miller 2008-04-29 14:20 ` Ingo Molnar 2008-04-29 22:31 ` David Miller 2008-04-29 16:51 ` Thomas Gleixner 2008-04-29 22:58 ` David Miller 2008-04-29 23:30 ` David Miller 2008-04-28 19:57 ` [PATCH] bitops: simplify generic bit finding functions Andi Kleen 2008-04-28 14:58 ` Alexander van Heukelum 2008-04-28 14:32 ` Alexander van Heukelum
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox