linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case
@ 2012-02-20 22:39 David Howells
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
  2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
  0 siblings, 2 replies; 8+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann

Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---

 include/asm-generic/getorder.h |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
 
 #include <linux/compiler.h>
 
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory.  This
+ * is on a logarithmic scale, where:
+ *
+ *	0 -> 2^0 * PAGE_SIZE and below
+ *	1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ *	2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ *	3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ *	4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ *	...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
 static inline __attribute_const__ int get_order(unsigned long size)
 {
 	int order;


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] Optimise get_order()
  2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
@ 2012-02-20 22:39 ` David Howells
  2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
  2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
  1 sibling, 1 reply; 8+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann

Optimise get_order() to use bit scanning instructions if such exist rather than
a loop.  Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.

This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64().  It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result.  This program
tests both dynamic and static parameters.

	#include <stdlib.h>
	#include <stdio.h>

	#ifdef __x86_64__
	#define BITS_PER_LONG 64
	#else
	#define BITS_PER_LONG 32
	#endif

	#define PAGE_SHIFT 12

	typedef unsigned long long __u64, u64;
	typedef unsigned int __u32, u32;
	#define noinline	__attribute__((noinline))

	static inline int fls(int x)
	{
		int bitpos = -1;

		asm("bsrl %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	}

	static __always_inline int fls64(__u64 x)
	{
	#if BITS_PER_LONG == 64
		long bitpos = -1;

		asm("bsrq %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	#else
		__u32 h = x >> 32, l = x;
		int bitpos = -1;

		asm("bsrl	%1,%0	\n"
		    "subl	%2,%0	\n"
		    "bsrl	%3,%0	\n"
		    : "+r" (bitpos)
		    : "rm" (l), "i"(32), "rm" (h));

		return bitpos + 33;
	#endif
	}

	static inline __attribute__((const))
	int __ilog2_u32(u32 n)
	{
		return fls(n) - 1;
	}

	static inline __attribute__((const))
	int __ilog2_u64(u64 n)
	{
		return fls64(n) - 1;
	}

	extern __attribute__((const, noreturn))
	int ____ilog2_NaN(void);

	#define ilog2(n)				\
	(						\
		__builtin_constant_p(n) ? (		\
			(n) < 1 ? ____ilog2_NaN() :	\
			(n) & (1ULL << 63) ? 63 :	\
			(n) & (1ULL << 62) ? 62 :	\
			(n) & (1ULL << 61) ? 61 :	\
			(n) & (1ULL << 60) ? 60 :	\
			(n) & (1ULL << 59) ? 59 :	\
			(n) & (1ULL << 58) ? 58 :	\
			(n) & (1ULL << 57) ? 57 :	\
			(n) & (1ULL << 56) ? 56 :	\
			(n) & (1ULL << 55) ? 55 :	\
			(n) & (1ULL << 54) ? 54 :	\
			(n) & (1ULL << 53) ? 53 :	\
			(n) & (1ULL << 52) ? 52 :	\
			(n) & (1ULL << 51) ? 51 :	\
			(n) & (1ULL << 50) ? 50 :	\
			(n) & (1ULL << 49) ? 49 :	\
			(n) & (1ULL << 48) ? 48 :	\
			(n) & (1ULL << 47) ? 47 :	\
			(n) & (1ULL << 46) ? 46 :	\
			(n) & (1ULL << 45) ? 45 :	\
			(n) & (1ULL << 44) ? 44 :	\
			(n) & (1ULL << 43) ? 43 :	\
			(n) & (1ULL << 42) ? 42 :	\
			(n) & (1ULL << 41) ? 41 :	\
			(n) & (1ULL << 40) ? 40 :	\
			(n) & (1ULL << 39) ? 39 :	\
			(n) & (1ULL << 38) ? 38 :	\
			(n) & (1ULL << 37) ? 37 :	\
			(n) & (1ULL << 36) ? 36 :	\
			(n) & (1ULL << 35) ? 35 :	\
			(n) & (1ULL << 34) ? 34 :	\
			(n) & (1ULL << 33) ? 33 :	\
			(n) & (1ULL << 32) ? 32 :	\
			(n) & (1ULL << 31) ? 31 :	\
			(n) & (1ULL << 30) ? 30 :	\
			(n) & (1ULL << 29) ? 29 :	\
			(n) & (1ULL << 28) ? 28 :	\
			(n) & (1ULL << 27) ? 27 :	\
			(n) & (1ULL << 26) ? 26 :	\
			(n) & (1ULL << 25) ? 25 :	\
			(n) & (1ULL << 24) ? 24 :	\
			(n) & (1ULL << 23) ? 23 :	\
			(n) & (1ULL << 22) ? 22 :	\
			(n) & (1ULL << 21) ? 21 :	\
			(n) & (1ULL << 20) ? 20 :	\
			(n) & (1ULL << 19) ? 19 :	\
			(n) & (1ULL << 18) ? 18 :	\
			(n) & (1ULL << 17) ? 17 :	\
			(n) & (1ULL << 16) ? 16 :	\
			(n) & (1ULL << 15) ? 15 :	\
			(n) & (1ULL << 14) ? 14 :	\
			(n) & (1ULL << 13) ? 13 :	\
			(n) & (1ULL << 12) ? 12 :	\
			(n) & (1ULL << 11) ? 11 :	\
			(n) & (1ULL << 10) ? 10 :	\
			(n) & (1ULL <<  9) ?  9 :	\
			(n) & (1ULL <<  8) ?  8 :	\
			(n) & (1ULL <<  7) ?  7 :	\
			(n) & (1ULL <<  6) ?  6 :	\
			(n) & (1ULL <<  5) ?  5 :	\
			(n) & (1ULL <<  4) ?  4 :	\
			(n) & (1ULL <<  3) ?  3 :	\
			(n) & (1ULL <<  2) ?  2 :	\
			(n) & (1ULL <<  1) ?  1 :	\
			(n) & (1ULL <<  0) ?  0 :	\
			____ilog2_NaN()			\
					   ) :		\
		(sizeof(n) <= 4) ?			\
		__ilog2_u32(n) :			\
		__ilog2_u64(n)				\
	 )

	static noinline __attribute__((const))
	int old_get_order(unsigned long size)
	{
		int order;

		size = (size - 1) >> (PAGE_SHIFT - 1);
		order = -1;
		do {
			size >>= 1;
			order++;
		} while (size);
		return order;
	}

	static noinline __attribute__((const))
	int __get_order(unsigned long size)
	{
		int order;
		size--;
		size >>= PAGE_SHIFT;
	#if BITS_PER_LONG == 32
		order = fls(size);
	#else
		order = fls64(size);
	#endif
		return order;
	}

	#define get_order(n)						\
	(								\
		__builtin_constant_p(n) ? (				\
			(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
			((n < (1UL << PAGE_SHIFT)) ? 0 :		\
			 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
		) :							\
		__get_order(n)						\
	)

	#define order(N) \
		{ (1UL << N) - 1,	get_order((1UL << N) - 1)	},	\
		{ (1UL << N),		get_order((1UL << N))		},	\
		{ (1UL << N) + 1,	get_order((1UL << N) + 1)	}

	struct order {
		unsigned long n, order;
	};

	static const struct order order_table[] = {
		order(0),
		order(1),
		order(2),
		order(3),
		order(4),
		order(5),
		order(6),
		order(7),
		order(8),
		order(9),
		order(10),
		order(11),
		order(12),
		order(13),
		order(14),
		order(15),
		order(16),
		order(17),
		order(18),
		order(19),
		order(20),
		order(21),
		order(22),
		order(23),
		order(24),
		order(25),
		order(26),
		order(27),
		order(28),
		order(29),
		order(30),
		order(31),
	#if BITS_PER_LONG == 64
		order(32),
		order(33),
		order(34),
		order(35),
	#endif
		{ 0x2929 }
	};

	void check(int loop, unsigned long n)
	{
		unsigned long old, new;

		printf("[%2d]: %09lx | ", loop, n);

		old = old_get_order(n);
		new = get_order(n);

		printf("%3ld, %3ld\n", old, new);
		if (n != 0 && old != new)
			abort();
	}

	int main(int argc, char **argv)
	{
		const struct order *p;
		unsigned long n;
		int loop;

		for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
			n = 1UL << loop;
			check(loop, n - 1);
			check(loop, n);
			check(loop, n + 1);
		}

		for (p = order_table; p->n != 0x2929; p++) {
			unsigned long old, new;

			old = old_get_order(p->n);
			new = p->order;
			printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
			if (p->n != 0 && old != new)
				abort();
		}

		return 0;
	}

Disassembling the x86_64 version of the above code shows:

	0000000000400510 <old_get_order>:
	  400510:       48 83 ef 01             sub    $0x1,%rdi
	  400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
	  400519:       48 c1 ef 0b             shr    $0xb,%rdi
	  40051d:       0f 1f 00                nopl   (%rax)
	  400520:       83 c0 01                add    $0x1,%eax
	  400523:       48 d1 ef                shr    %rdi
	  400526:       75 f8                   jne    400520 <old_get_order+0x10>
	  400528:       f3 c3                   repz retq
	  40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

	0000000000400530 <__get_order>:
	  400530:       48 83 ef 01             sub    $0x1,%rdi
	  400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
	  40053b:       48 c1 ef 0c             shr    $0xc,%rdi
	  40053f:       48 0f bd c7             bsr    %rdi,%rax
	  400543:       83 c0 01                add    $0x1,%eax
	  400546:       c3                      retq
	  400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
	  40054e:       00 00

As can be seen, the new __get_order() function is simpler than the
old_get_order() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---

 include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
 1 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+	int order;
+
+	size--;
+	size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
 
 /**
  * get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
  * This function may be used to initialise variables with compile time
  * evaluations of constants.
  */
-static inline __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
+#define get_order(n)						\
+(								\
+	__builtin_constant_p(n) ? (				\
+		(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
+		((n < (1UL << PAGE_SHIFT)) ? 0 :		\
+		 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
+	) :							\
+	__get_order(n)						\
+)
 
 #endif	/* __ASSEMBLY__ */
 


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case
  2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:19 ` tip-bot for David Howells
  1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx

Commit-ID:  e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Gitweb:     http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000
Committer:  H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:46:55 -0800

bitops: Adjust the comment on get_order() to describe the size==0 case

Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-generic/getorder.h |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
 
 #include <linux/compiler.h>
 
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory.  This
+ * is on a logarithmic scale, where:
+ *
+ *	0 -> 2^0 * PAGE_SIZE and below
+ *	1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ *	2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ *	3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ *	4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ *	...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
 static inline __attribute_const__ int get_order(unsigned long size)
 {
 	int order;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [tip:x86/asm] bitops: Optimise get_order()
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:20   ` tip-bot for David Howells
  2012-02-29 20:29     ` Paul Gortmaker
  0 siblings, 1 reply; 8+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx

Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
Committer:  H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:47:02 -0800

bitops: Optimise get_order()

Optimise get_order() to use bit scanning instructions if such exist rather than
a loop.  Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.

This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64().  It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result.  This program
tests both dynamic and static parameters.

	#include <stdlib.h>
	#include <stdio.h>

	#ifdef __x86_64__
	#define BITS_PER_LONG 64
	#else
	#define BITS_PER_LONG 32
	#endif

	#define PAGE_SHIFT 12

	typedef unsigned long long __u64, u64;
	typedef unsigned int __u32, u32;
	#define noinline	__attribute__((noinline))

	static inline int fls(int x)
	{
		int bitpos = -1;

		asm("bsrl %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	}

	static __always_inline int fls64(__u64 x)
	{
	#if BITS_PER_LONG == 64
		long bitpos = -1;

		asm("bsrq %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	#else
		__u32 h = x >> 32, l = x;
		int bitpos = -1;

		asm("bsrl	%1,%0	\n"
		    "subl	%2,%0	\n"
		    "bsrl	%3,%0	\n"
		    : "+r" (bitpos)
		    : "rm" (l), "i"(32), "rm" (h));

		return bitpos + 33;
	#endif
	}

	static inline __attribute__((const))
	int __ilog2_u32(u32 n)
	{
		return fls(n) - 1;
	}

	static inline __attribute__((const))
	int __ilog2_u64(u64 n)
	{
		return fls64(n) - 1;
	}

	extern __attribute__((const, noreturn))
	int ____ilog2_NaN(void);

	#define ilog2(n)				\
	(						\
		__builtin_constant_p(n) ? (		\
			(n) < 1 ? ____ilog2_NaN() :	\
			(n) & (1ULL << 63) ? 63 :	\
			(n) & (1ULL << 62) ? 62 :	\
			(n) & (1ULL << 61) ? 61 :	\
			(n) & (1ULL << 60) ? 60 :	\
			(n) & (1ULL << 59) ? 59 :	\
			(n) & (1ULL << 58) ? 58 :	\
			(n) & (1ULL << 57) ? 57 :	\
			(n) & (1ULL << 56) ? 56 :	\
			(n) & (1ULL << 55) ? 55 :	\
			(n) & (1ULL << 54) ? 54 :	\
			(n) & (1ULL << 53) ? 53 :	\
			(n) & (1ULL << 52) ? 52 :	\
			(n) & (1ULL << 51) ? 51 :	\
			(n) & (1ULL << 50) ? 50 :	\
			(n) & (1ULL << 49) ? 49 :	\
			(n) & (1ULL << 48) ? 48 :	\
			(n) & (1ULL << 47) ? 47 :	\
			(n) & (1ULL << 46) ? 46 :	\
			(n) & (1ULL << 45) ? 45 :	\
			(n) & (1ULL << 44) ? 44 :	\
			(n) & (1ULL << 43) ? 43 :	\
			(n) & (1ULL << 42) ? 42 :	\
			(n) & (1ULL << 41) ? 41 :	\
			(n) & (1ULL << 40) ? 40 :	\
			(n) & (1ULL << 39) ? 39 :	\
			(n) & (1ULL << 38) ? 38 :	\
			(n) & (1ULL << 37) ? 37 :	\
			(n) & (1ULL << 36) ? 36 :	\
			(n) & (1ULL << 35) ? 35 :	\
			(n) & (1ULL << 34) ? 34 :	\
			(n) & (1ULL << 33) ? 33 :	\
			(n) & (1ULL << 32) ? 32 :	\
			(n) & (1ULL << 31) ? 31 :	\
			(n) & (1ULL << 30) ? 30 :	\
			(n) & (1ULL << 29) ? 29 :	\
			(n) & (1ULL << 28) ? 28 :	\
			(n) & (1ULL << 27) ? 27 :	\
			(n) & (1ULL << 26) ? 26 :	\
			(n) & (1ULL << 25) ? 25 :	\
			(n) & (1ULL << 24) ? 24 :	\
			(n) & (1ULL << 23) ? 23 :	\
			(n) & (1ULL << 22) ? 22 :	\
			(n) & (1ULL << 21) ? 21 :	\
			(n) & (1ULL << 20) ? 20 :	\
			(n) & (1ULL << 19) ? 19 :	\
			(n) & (1ULL << 18) ? 18 :	\
			(n) & (1ULL << 17) ? 17 :	\
			(n) & (1ULL << 16) ? 16 :	\
			(n) & (1ULL << 15) ? 15 :	\
			(n) & (1ULL << 14) ? 14 :	\
			(n) & (1ULL << 13) ? 13 :	\
			(n) & (1ULL << 12) ? 12 :	\
			(n) & (1ULL << 11) ? 11 :	\
			(n) & (1ULL << 10) ? 10 :	\
			(n) & (1ULL <<  9) ?  9 :	\
			(n) & (1ULL <<  8) ?  8 :	\
			(n) & (1ULL <<  7) ?  7 :	\
			(n) & (1ULL <<  6) ?  6 :	\
			(n) & (1ULL <<  5) ?  5 :	\
			(n) & (1ULL <<  4) ?  4 :	\
			(n) & (1ULL <<  3) ?  3 :	\
			(n) & (1ULL <<  2) ?  2 :	\
			(n) & (1ULL <<  1) ?  1 :	\
			(n) & (1ULL <<  0) ?  0 :	\
			____ilog2_NaN()			\
					   ) :		\
		(sizeof(n) <= 4) ?			\
		__ilog2_u32(n) :			\
		__ilog2_u64(n)				\
	 )

	static noinline __attribute__((const))
	int old_get_order(unsigned long size)
	{
		int order;

		size = (size - 1) >> (PAGE_SHIFT - 1);
		order = -1;
		do {
			size >>= 1;
			order++;
		} while (size);
		return order;
	}

	static noinline __attribute__((const))
	int __get_order(unsigned long size)
	{
		int order;
		size--;
		size >>= PAGE_SHIFT;
	#if BITS_PER_LONG == 32
		order = fls(size);
	#else
		order = fls64(size);
	#endif
		return order;
	}

	#define get_order(n)						\
	(								\
		__builtin_constant_p(n) ? (				\
			(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
			((n < (1UL << PAGE_SHIFT)) ? 0 :		\
			 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
		) :							\
		__get_order(n)						\
	)

	#define order(N) \
		{ (1UL << N) - 1,	get_order((1UL << N) - 1)	},	\
		{ (1UL << N),		get_order((1UL << N))		},	\
		{ (1UL << N) + 1,	get_order((1UL << N) + 1)	}

	struct order {
		unsigned long n, order;
	};

	static const struct order order_table[] = {
		order(0),
		order(1),
		order(2),
		order(3),
		order(4),
		order(5),
		order(6),
		order(7),
		order(8),
		order(9),
		order(10),
		order(11),
		order(12),
		order(13),
		order(14),
		order(15),
		order(16),
		order(17),
		order(18),
		order(19),
		order(20),
		order(21),
		order(22),
		order(23),
		order(24),
		order(25),
		order(26),
		order(27),
		order(28),
		order(29),
		order(30),
		order(31),
	#if BITS_PER_LONG == 64
		order(32),
		order(33),
		order(34),
		order(35),
	#endif
		{ 0x2929 }
	};

	void check(int loop, unsigned long n)
	{
		unsigned long old, new;

		printf("[%2d]: %09lx | ", loop, n);

		old = old_get_order(n);
		new = get_order(n);

		printf("%3ld, %3ld\n", old, new);
		if (n != 0 && old != new)
			abort();
	}

	int main(int argc, char **argv)
	{
		const struct order *p;
		unsigned long n;
		int loop;

		for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
			n = 1UL << loop;
			check(loop, n - 1);
			check(loop, n);
			check(loop, n + 1);
		}

		for (p = order_table; p->n != 0x2929; p++) {
			unsigned long old, new;

			old = old_get_order(p->n);
			new = p->order;
			printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
			if (p->n != 0 && old != new)
				abort();
		}

		return 0;
	}

Disassembling the x86_64 version of the above code shows:

	0000000000400510 <old_get_order>:
	  400510:       48 83 ef 01             sub    $0x1,%rdi
	  400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
	  400519:       48 c1 ef 0b             shr    $0xb,%rdi
	  40051d:       0f 1f 00                nopl   (%rax)
	  400520:       83 c0 01                add    $0x1,%eax
	  400523:       48 d1 ef                shr    %rdi
	  400526:       75 f8                   jne    400520 <old_get_order+0x10>
	  400528:       f3 c3                   repz retq
	  40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

	0000000000400530 <__get_order>:
	  400530:       48 83 ef 01             sub    $0x1,%rdi
	  400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
	  40053b:       48 c1 ef 0c             shr    $0xc,%rdi
	  40053f:       48 0f bd c7             bsr    %rdi,%rax
	  400543:       83 c0 01                add    $0x1,%eax
	  400546:       c3                      retq
	  400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
	  40054e:       00 00

As can be seen, the new __get_order() function is simpler than the
old_get_order() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
 1 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+	int order;
+
+	size--;
+	size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
 
 /**
  * get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
  * This function may be used to initialise variables with compile time
  * evaluations of constants.
  */
-static inline __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
+#define get_order(n)						\
+(								\
+	__builtin_constant_p(n) ? (				\
+		(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
+		((n < (1UL << PAGE_SHIFT)) ? 0 :		\
+		 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
+	) :							\
+	__get_order(n)						\
+)
 
 #endif	/* __ASSEMBLY__ */
 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
@ 2012-02-29 20:29     ` Paul Gortmaker
  2012-03-01  0:31       ` Stephen Rothwell
  2012-03-06 14:24       ` David Howells
  0 siblings, 2 replies; 8+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
  To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
  Cc: linux-tip-commits, linux-next

On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author:     David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer:  H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()

This is causing build failures on non-x86 in linux next according to git bisect.

Here is one example:

http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/

Paul.
--

>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop.  Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64().  It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result.  This program
> tests both dynamic and static parameters.
>
>        #include <stdlib.h>
>        #include <stdio.h>
>
>        #ifdef __x86_64__
>        #define BITS_PER_LONG 64
>        #else
>        #define BITS_PER_LONG 32
>        #endif
>
>        #define PAGE_SHIFT 12
>
>        typedef unsigned long long __u64, u64;
>        typedef unsigned int __u32, u32;
>        #define noinline        __attribute__((noinline))
>
>        static inline int fls(int x)
>        {
>                int bitpos = -1;
>
>                asm("bsrl %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        }
>
>        static __always_inline int fls64(__u64 x)
>        {
>        #if BITS_PER_LONG == 64
>                long bitpos = -1;
>
>                asm("bsrq %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        #else
>                __u32 h = x >> 32, l = x;
>                int bitpos = -1;
>
>                asm("bsrl       %1,%0   \n"
>                    "subl       %2,%0   \n"
>                    "bsrl       %3,%0   \n"
>                    : "+r" (bitpos)
>                    : "rm" (l), "i"(32), "rm" (h));
>
>                return bitpos + 33;
>        #endif
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u32(u32 n)
>        {
>                return fls(n) - 1;
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u64(u64 n)
>        {
>                return fls64(n) - 1;
>        }
>
>        extern __attribute__((const, noreturn))
>        int ____ilog2_NaN(void);
>
>        #define ilog2(n)                                \
>        (                                               \
>                __builtin_constant_p(n) ? (             \
>                        (n) < 1 ? ____ilog2_NaN() :     \
>                        (n) & (1ULL << 63) ? 63 :       \
>                        (n) & (1ULL << 62) ? 62 :       \
>                        (n) & (1ULL << 61) ? 61 :       \
>                        (n) & (1ULL << 60) ? 60 :       \
>                        (n) & (1ULL << 59) ? 59 :       \
>                        (n) & (1ULL << 58) ? 58 :       \
>                        (n) & (1ULL << 57) ? 57 :       \
>                        (n) & (1ULL << 56) ? 56 :       \
>                        (n) & (1ULL << 55) ? 55 :       \
>                        (n) & (1ULL << 54) ? 54 :       \
>                        (n) & (1ULL << 53) ? 53 :       \
>                        (n) & (1ULL << 52) ? 52 :       \
>                        (n) & (1ULL << 51) ? 51 :       \
>                        (n) & (1ULL << 50) ? 50 :       \
>                        (n) & (1ULL << 49) ? 49 :       \
>                        (n) & (1ULL << 48) ? 48 :       \
>                        (n) & (1ULL << 47) ? 47 :       \
>                        (n) & (1ULL << 46) ? 46 :       \
>                        (n) & (1ULL << 45) ? 45 :       \
>                        (n) & (1ULL << 44) ? 44 :       \
>                        (n) & (1ULL << 43) ? 43 :       \
>                        (n) & (1ULL << 42) ? 42 :       \
>                        (n) & (1ULL << 41) ? 41 :       \
>                        (n) & (1ULL << 40) ? 40 :       \
>                        (n) & (1ULL << 39) ? 39 :       \
>                        (n) & (1ULL << 38) ? 38 :       \
>                        (n) & (1ULL << 37) ? 37 :       \
>                        (n) & (1ULL << 36) ? 36 :       \
>                        (n) & (1ULL << 35) ? 35 :       \
>                        (n) & (1ULL << 34) ? 34 :       \
>                        (n) & (1ULL << 33) ? 33 :       \
>                        (n) & (1ULL << 32) ? 32 :       \
>                        (n) & (1ULL << 31) ? 31 :       \
>                        (n) & (1ULL << 30) ? 30 :       \
>                        (n) & (1ULL << 29) ? 29 :       \
>                        (n) & (1ULL << 28) ? 28 :       \
>                        (n) & (1ULL << 27) ? 27 :       \
>                        (n) & (1ULL << 26) ? 26 :       \
>                        (n) & (1ULL << 25) ? 25 :       \
>                        (n) & (1ULL << 24) ? 24 :       \
>                        (n) & (1ULL << 23) ? 23 :       \
>                        (n) & (1ULL << 22) ? 22 :       \
>                        (n) & (1ULL << 21) ? 21 :       \
>                        (n) & (1ULL << 20) ? 20 :       \
>                        (n) & (1ULL << 19) ? 19 :       \
>                        (n) & (1ULL << 18) ? 18 :       \
>                        (n) & (1ULL << 17) ? 17 :       \
>                        (n) & (1ULL << 16) ? 16 :       \
>                        (n) & (1ULL << 15) ? 15 :       \
>                        (n) & (1ULL << 14) ? 14 :       \
>                        (n) & (1ULL << 13) ? 13 :       \
>                        (n) & (1ULL << 12) ? 12 :       \
>                        (n) & (1ULL << 11) ? 11 :       \
>                        (n) & (1ULL << 10) ? 10 :       \
>                        (n) & (1ULL <<  9) ?  9 :       \
>                        (n) & (1ULL <<  8) ?  8 :       \
>                        (n) & (1ULL <<  7) ?  7 :       \
>                        (n) & (1ULL <<  6) ?  6 :       \
>                        (n) & (1ULL <<  5) ?  5 :       \
>                        (n) & (1ULL <<  4) ?  4 :       \
>                        (n) & (1ULL <<  3) ?  3 :       \
>                        (n) & (1ULL <<  2) ?  2 :       \
>                        (n) & (1ULL <<  1) ?  1 :       \
>                        (n) & (1ULL <<  0) ?  0 :       \
>                        ____ilog2_NaN()                 \
>                                           ) :          \
>                (sizeof(n) <= 4) ?                      \
>                __ilog2_u32(n) :                        \
>                __ilog2_u64(n)                          \
>         )
>
>        static noinline __attribute__((const))
>        int old_get_order(unsigned long size)
>        {
>                int order;
>
>                size = (size - 1) >> (PAGE_SHIFT - 1);
>                order = -1;
>                do {
>                        size >>= 1;
>                        order++;
>                } while (size);
>                return order;
>        }
>
>        static noinline __attribute__((const))
>        int __get_order(unsigned long size)
>        {
>                int order;
>                size--;
>                size >>= PAGE_SHIFT;
>        #if BITS_PER_LONG == 32
>                order = fls(size);
>        #else
>                order = fls64(size);
>        #endif
>                return order;
>        }
>
>        #define get_order(n)                                            \
>        (                                                               \
>                __builtin_constant_p(n) ? (                             \
>                        (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
>                        ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
>                         ilog2((n) - 1) - PAGE_SHIFT + 1)               \
>                ) :                                                     \
>                __get_order(n)                                          \
>        )
>
>        #define order(N) \
>                { (1UL << N) - 1,       get_order((1UL << N) - 1)       },      \
>                { (1UL << N),           get_order((1UL << N))           },      \
>                { (1UL << N) + 1,       get_order((1UL << N) + 1)       }
>
>        struct order {
>                unsigned long n, order;
>        };
>
>        static const struct order order_table[] = {
>                order(0),
>                order(1),
>                order(2),
>                order(3),
>                order(4),
>                order(5),
>                order(6),
>                order(7),
>                order(8),
>                order(9),
>                order(10),
>                order(11),
>                order(12),
>                order(13),
>                order(14),
>                order(15),
>                order(16),
>                order(17),
>                order(18),
>                order(19),
>                order(20),
>                order(21),
>                order(22),
>                order(23),
>                order(24),
>                order(25),
>                order(26),
>                order(27),
>                order(28),
>                order(29),
>                order(30),
>                order(31),
>        #if BITS_PER_LONG == 64
>                order(32),
>                order(33),
>                order(34),
>                order(35),
>        #endif
>                { 0x2929 }
>        };
>
>        void check(int loop, unsigned long n)
>        {
>                unsigned long old, new;
>
>                printf("[%2d]: %09lx | ", loop, n);
>
>                old = old_get_order(n);
>                new = get_order(n);
>
>                printf("%3ld, %3ld\n", old, new);
>                if (n != 0 && old != new)
>                        abort();
>        }
>
>        int main(int argc, char **argv)
>        {
>                const struct order *p;
>                unsigned long n;
>                int loop;
>
>                for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
>                        n = 1UL << loop;
>                        check(loop, n - 1);
>                        check(loop, n);
>                        check(loop, n + 1);
>                }
>
>                for (p = order_table; p->n != 0x2929; p++) {
>                        unsigned long old, new;
>
>                        old = old_get_order(p->n);
>                        new = p->order;
>                        printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
>                        if (p->n != 0 && old != new)
>                                abort();
>                }
>
>                return 0;
>        }
>
> Disassembling the x86_64 version of the above code shows:
>
>        0000000000400510 <old_get_order>:
>          400510:       48 83 ef 01             sub    $0x1,%rdi
>          400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
>          400519:       48 c1 ef 0b             shr    $0xb,%rdi
>          40051d:       0f 1f 00                nopl   (%rax)
>          400520:       83 c0 01                add    $0x1,%eax
>          400523:       48 d1 ef                shr    %rdi
>          400526:       75 f8                   jne    400520 <old_get_order+0x10>
>          400528:       f3 c3                   repz retq
>          40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
>
>        0000000000400530 <__get_order>:
>          400530:       48 83 ef 01             sub    $0x1,%rdi
>          400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
>          40053b:       48 c1 ef 0c             shr    $0xc,%rdi
>          40053f:       48 0f bd c7             bsr    %rdi,%rax
>          400543:       83 c0 01                add    $0x1,%eax
>          400546:       c3                      retq
>          400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
>          40054e:       00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
>  include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
>  1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
>  #ifndef __ASSEMBLY__
>
>  #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> +       int order;
> +
> +       size--;
> +       size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> +       order = fls(size);
> +#else
> +       order = fls64(size);
> +#endif
> +       return order;
> +}
>
>  /**
>  * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
>  * This function may be used to initialise variables with compile time
>  * evaluations of constants.
>  */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> -       int order;
> -
> -       size = (size - 1) >> (PAGE_SHIFT - 1);
> -       order = -1;
> -       do {
> -               size >>= 1;
> -               order++;
> -       } while (size);
> -       return order;
> -}
> +#define get_order(n)                                           \
> +(                                                              \
> +       __builtin_constant_p(n) ? (                             \
> +               (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
> +               ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
> +                ilog2((n) - 1) - PAGE_SHIFT + 1)               \
> +       ) :                                                     \
> +       __get_order(n)                                          \
> +)
>
>  #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-02-29 20:29     ` Paul Gortmaker
@ 2012-03-01  0:31       ` Stephen Rothwell
  2012-03-01 21:02         ` H. Peter Anvin
  2012-03-06 14:24       ` David Howells
  1 sibling, 1 reply; 8+ messages in thread
From: Stephen Rothwell @ 2012-03-01  0:31 UTC (permalink / raw)
  To: Paul Gortmaker
  Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits,
	linux-next

[-- Attachment #1: Type: text/plain, Size: 1199 bytes --]

On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
> <dhowells@redhat.com> wrote:
> > Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
> > Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> > Author:     David Howells <dhowells@redhat.com>
> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> > Committer:  H. Peter Anvin <hpa@zytor.com>
> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
> >
> > bitops: Optimise get_order()
> 
> This is causing build failures on non-x86 in linux next according to git bisect.

Presumably it needs to include linux/bitops.h (and see below).

> > +static inline __attribute_const__
> > +int __get_order(unsigned long size)
> > +{
> > +       int order;
> > +
> > +       size--;
> > +       size >>= PAGE_SHIFT;
> > +#if BITS_PER_LONG == 32
> > +       order = fls(size);
> > +#else
> > +       order = fls64(size);
> > +#endif

linux/bitops.h has fls_long() that does this size test and calls the right thing.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-03-01  0:31       ` Stephen Rothwell
@ 2012-03-01 21:02         ` H. Peter Anvin
  0 siblings, 0 replies; 8+ messages in thread
From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx,
	linux-tip-commits, linux-next

David, could you make a fix for this?

	-hpa

On 02/29/2012 04:31 PM, Stephen Rothwell wrote:
> On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com>  wrote:
>>
>> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
>> <dhowells@redhat.com>  wrote:
>>> Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Author:     David Howells<dhowells@redhat.com>
>>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
>>> Committer:  H. Peter Anvin<hpa@zytor.com>
>>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>>>
>>> bitops: Optimise get_order()
>>
>> This is causing build failures on non-x86 in linux next according to git bisect.
>
> Presumably it needs to include linux/bitops.h (and see below).
>
>>> +static inline __attribute_const__
>>> +int __get_order(unsigned long size)
>>> +{
>>> +       int order;
>>> +
>>> +       size--;
>>> +       size>>= PAGE_SHIFT;
>>> +#if BITS_PER_LONG == 32
>>> +       order = fls(size);
>>> +#else
>>> +       order = fls64(size);
>>> +#endif
>
> linux/bitops.h has fls_long() that does this size test and calls the right thing.
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-02-29 20:29     ` Paul Gortmaker
  2012-03-01  0:31       ` Stephen Rothwell
@ 2012-03-06 14:24       ` David Howells
  1 sibling, 0 replies; 8+ messages in thread
From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx,
	linux-tip-commits, linux-next

Stephen Rothwell <sfr@canb.auug.org.au> wrote:

> Presumably it needs to include linux/bitops.h (and see below).

That won't help.  It already includes that via linux/log2.h.

The error chain looks suspiciously like a circular dependency has occurred.

David

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-03-06 14:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-29 20:29     ` Paul Gortmaker
2012-03-01  0:31       ` Stephen Rothwell
2012-03-01 21:02         ` H. Peter Anvin
2012-03-06 14:24       ` David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).