All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case
@ 2012-02-20 22:39 David Howells
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
  2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
  0 siblings, 2 replies; 9+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann

Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---

 include/asm-generic/getorder.h |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
 
 #include <linux/compiler.h>
 
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory.  This
+ * is on a logarithmic scale, where:
+ *
+ *	0 -> 2^0 * PAGE_SIZE and below
+ *	1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ *	2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ *	3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ *	4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ *	...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
 static inline __attribute_const__ int get_order(unsigned long size)
 {
 	int order;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] Optimise get_order()
  2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
@ 2012-02-20 22:39 ` David Howells
  2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
  2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
  1 sibling, 1 reply; 9+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
  To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann

Optimise get_order() to use bit scanning instructions if such exist rather than
a loop.  Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.

This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64().  It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result.  This program
tests both dynamic and static parameters.

	#include <stdlib.h>
	#include <stdio.h>

	#ifdef __x86_64__
	#define BITS_PER_LONG 64
	#else
	#define BITS_PER_LONG 32
	#endif

	#define PAGE_SHIFT 12

	typedef unsigned long long __u64, u64;
	typedef unsigned int __u32, u32;
	#define noinline	__attribute__((noinline))

	static inline int fls(int x)
	{
		int bitpos = -1;

		asm("bsrl %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	}

	static __always_inline int fls64(__u64 x)
	{
	#if BITS_PER_LONG == 64
		long bitpos = -1;

		asm("bsrq %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	#else
		__u32 h = x >> 32, l = x;
		int bitpos = -1;

		asm("bsrl	%1,%0	\n"
		    "subl	%2,%0	\n"
		    "bsrl	%3,%0	\n"
		    : "+r" (bitpos)
		    : "rm" (l), "i"(32), "rm" (h));

		return bitpos + 33;
	#endif
	}

	static inline __attribute__((const))
	int __ilog2_u32(u32 n)
	{
		return fls(n) - 1;
	}

	static inline __attribute__((const))
	int __ilog2_u64(u64 n)
	{
		return fls64(n) - 1;
	}

	extern __attribute__((const, noreturn))
	int ____ilog2_NaN(void);

	#define ilog2(n)				\
	(						\
		__builtin_constant_p(n) ? (		\
			(n) < 1 ? ____ilog2_NaN() :	\
			(n) & (1ULL << 63) ? 63 :	\
			(n) & (1ULL << 62) ? 62 :	\
			(n) & (1ULL << 61) ? 61 :	\
			(n) & (1ULL << 60) ? 60 :	\
			(n) & (1ULL << 59) ? 59 :	\
			(n) & (1ULL << 58) ? 58 :	\
			(n) & (1ULL << 57) ? 57 :	\
			(n) & (1ULL << 56) ? 56 :	\
			(n) & (1ULL << 55) ? 55 :	\
			(n) & (1ULL << 54) ? 54 :	\
			(n) & (1ULL << 53) ? 53 :	\
			(n) & (1ULL << 52) ? 52 :	\
			(n) & (1ULL << 51) ? 51 :	\
			(n) & (1ULL << 50) ? 50 :	\
			(n) & (1ULL << 49) ? 49 :	\
			(n) & (1ULL << 48) ? 48 :	\
			(n) & (1ULL << 47) ? 47 :	\
			(n) & (1ULL << 46) ? 46 :	\
			(n) & (1ULL << 45) ? 45 :	\
			(n) & (1ULL << 44) ? 44 :	\
			(n) & (1ULL << 43) ? 43 :	\
			(n) & (1ULL << 42) ? 42 :	\
			(n) & (1ULL << 41) ? 41 :	\
			(n) & (1ULL << 40) ? 40 :	\
			(n) & (1ULL << 39) ? 39 :	\
			(n) & (1ULL << 38) ? 38 :	\
			(n) & (1ULL << 37) ? 37 :	\
			(n) & (1ULL << 36) ? 36 :	\
			(n) & (1ULL << 35) ? 35 :	\
			(n) & (1ULL << 34) ? 34 :	\
			(n) & (1ULL << 33) ? 33 :	\
			(n) & (1ULL << 32) ? 32 :	\
			(n) & (1ULL << 31) ? 31 :	\
			(n) & (1ULL << 30) ? 30 :	\
			(n) & (1ULL << 29) ? 29 :	\
			(n) & (1ULL << 28) ? 28 :	\
			(n) & (1ULL << 27) ? 27 :	\
			(n) & (1ULL << 26) ? 26 :	\
			(n) & (1ULL << 25) ? 25 :	\
			(n) & (1ULL << 24) ? 24 :	\
			(n) & (1ULL << 23) ? 23 :	\
			(n) & (1ULL << 22) ? 22 :	\
			(n) & (1ULL << 21) ? 21 :	\
			(n) & (1ULL << 20) ? 20 :	\
			(n) & (1ULL << 19) ? 19 :	\
			(n) & (1ULL << 18) ? 18 :	\
			(n) & (1ULL << 17) ? 17 :	\
			(n) & (1ULL << 16) ? 16 :	\
			(n) & (1ULL << 15) ? 15 :	\
			(n) & (1ULL << 14) ? 14 :	\
			(n) & (1ULL << 13) ? 13 :	\
			(n) & (1ULL << 12) ? 12 :	\
			(n) & (1ULL << 11) ? 11 :	\
			(n) & (1ULL << 10) ? 10 :	\
			(n) & (1ULL <<  9) ?  9 :	\
			(n) & (1ULL <<  8) ?  8 :	\
			(n) & (1ULL <<  7) ?  7 :	\
			(n) & (1ULL <<  6) ?  6 :	\
			(n) & (1ULL <<  5) ?  5 :	\
			(n) & (1ULL <<  4) ?  4 :	\
			(n) & (1ULL <<  3) ?  3 :	\
			(n) & (1ULL <<  2) ?  2 :	\
			(n) & (1ULL <<  1) ?  1 :	\
			(n) & (1ULL <<  0) ?  0 :	\
			____ilog2_NaN()			\
					   ) :		\
		(sizeof(n) <= 4) ?			\
		__ilog2_u32(n) :			\
		__ilog2_u64(n)				\
	 )

	static noinline __attribute__((const))
	int old_get_order(unsigned long size)
	{
		int order;

		size = (size - 1) >> (PAGE_SHIFT - 1);
		order = -1;
		do {
			size >>= 1;
			order++;
		} while (size);
		return order;
	}

	static noinline __attribute__((const))
	int __get_order(unsigned long size)
	{
		int order;
		size--;
		size >>= PAGE_SHIFT;
	#if BITS_PER_LONG == 32
		order = fls(size);
	#else
		order = fls64(size);
	#endif
		return order;
	}

	#define get_order(n)						\
	(								\
		__builtin_constant_p(n) ? (				\
			(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
			((n < (1UL << PAGE_SHIFT)) ? 0 :		\
			 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
		) :							\
		__get_order(n)						\
	)

	#define order(N) \
		{ (1UL << N) - 1,	get_order((1UL << N) - 1)	},	\
		{ (1UL << N),		get_order((1UL << N))		},	\
		{ (1UL << N) + 1,	get_order((1UL << N) + 1)	}

	struct order {
		unsigned long n, order;
	};

	static const struct order order_table[] = {
		order(0),
		order(1),
		order(2),
		order(3),
		order(4),
		order(5),
		order(6),
		order(7),
		order(8),
		order(9),
		order(10),
		order(11),
		order(12),
		order(13),
		order(14),
		order(15),
		order(16),
		order(17),
		order(18),
		order(19),
		order(20),
		order(21),
		order(22),
		order(23),
		order(24),
		order(25),
		order(26),
		order(27),
		order(28),
		order(29),
		order(30),
		order(31),
	#if BITS_PER_LONG == 64
		order(32),
		order(33),
		order(34),
		order(35),
	#endif
		{ 0x2929 }
	};

	void check(int loop, unsigned long n)
	{
		unsigned long old, new;

		printf("[%2d]: %09lx | ", loop, n);

		old = old_get_order(n);
		new = get_order(n);

		printf("%3ld, %3ld\n", old, new);
		if (n != 0 && old != new)
			abort();
	}

	int main(int argc, char **argv)
	{
		const struct order *p;
		unsigned long n;
		int loop;

		for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
			n = 1UL << loop;
			check(loop, n - 1);
			check(loop, n);
			check(loop, n + 1);
		}

		for (p = order_table; p->n != 0x2929; p++) {
			unsigned long old, new;

			old = old_get_order(p->n);
			new = p->order;
			printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
			if (p->n != 0 && old != new)
				abort();
		}

		return 0;
	}

Disassembling the x86_64 version of the above code shows:

	0000000000400510 <old_get_order>:
	  400510:       48 83 ef 01             sub    $0x1,%rdi
	  400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
	  400519:       48 c1 ef 0b             shr    $0xb,%rdi
	  40051d:       0f 1f 00                nopl   (%rax)
	  400520:       83 c0 01                add    $0x1,%eax
	  400523:       48 d1 ef                shr    %rdi
	  400526:       75 f8                   jne    400520 <old_get_order+0x10>
	  400528:       f3 c3                   repz retq
	  40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

	0000000000400530 <__get_order>:
	  400530:       48 83 ef 01             sub    $0x1,%rdi
	  400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
	  40053b:       48 c1 ef 0c             shr    $0xc,%rdi
	  40053f:       48 0f bd c7             bsr    %rdi,%rax
	  400543:       83 c0 01                add    $0x1,%eax
	  400546:       c3                      retq
	  400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
	  40054e:       00 00

As can be seen, the new __get_order() function is simpler than the
old_get_order() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---

 include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
 1 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+	int order;
+
+	size--;
+	size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
 
 /**
  * get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
  * This function may be used to initialise variables with compile time
  * evaluations of constants.
  */
-static inline __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
+#define get_order(n)						\
+(								\
+	__builtin_constant_p(n) ? (				\
+		(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
+		((n < (1UL << PAGE_SHIFT)) ? 0 :		\
+		 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
+	) :							\
+	__get_order(n)						\
+)
 
 #endif	/* __ASSEMBLY__ */
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case
  2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:19 ` tip-bot for David Howells
  1 sibling, 0 replies; 9+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx

Commit-ID:  e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Gitweb:     http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000
Committer:  H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:46:55 -0800

bitops: Adjust the comment on get_order() to describe the size==0 case

Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-generic/getorder.h |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
 
 #include <linux/compiler.h>
 
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory.  This
+ * is on a logarithmic scale, where:
+ *
+ *	0 -> 2^0 * PAGE_SIZE and below
+ *	1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ *	2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ *	3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ *	4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ *	...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
 static inline __attribute_const__ int get_order(unsigned long size)
 {
 	int order;

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [tip:x86/asm] bitops: Optimise get_order()
  2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:20   ` tip-bot for David Howells
  2012-02-29 20:29       ` Paul Gortmaker
  0 siblings, 1 reply; 9+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx

Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
Committer:  H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:47:02 -0800

bitops: Optimise get_order()

Optimise get_order() to use bit scanning instructions if such exist rather than
a loop.  Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.

This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64().  It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result.  This program
tests both dynamic and static parameters.

	#include <stdlib.h>
	#include <stdio.h>

	#ifdef __x86_64__
	#define BITS_PER_LONG 64
	#else
	#define BITS_PER_LONG 32
	#endif

	#define PAGE_SHIFT 12

	typedef unsigned long long __u64, u64;
	typedef unsigned int __u32, u32;
	#define noinline	__attribute__((noinline))

	static inline int fls(int x)
	{
		int bitpos = -1;

		asm("bsrl %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	}

	static __always_inline int fls64(__u64 x)
	{
	#if BITS_PER_LONG == 64
		long bitpos = -1;

		asm("bsrq %1,%0"
		    : "+r" (bitpos)
		    : "rm" (x));
		return bitpos + 1;
	#else
		__u32 h = x >> 32, l = x;
		int bitpos = -1;

		asm("bsrl	%1,%0	\n"
		    "subl	%2,%0	\n"
		    "bsrl	%3,%0	\n"
		    : "+r" (bitpos)
		    : "rm" (l), "i"(32), "rm" (h));

		return bitpos + 33;
	#endif
	}

	static inline __attribute__((const))
	int __ilog2_u32(u32 n)
	{
		return fls(n) - 1;
	}

	static inline __attribute__((const))
	int __ilog2_u64(u64 n)
	{
		return fls64(n) - 1;
	}

	extern __attribute__((const, noreturn))
	int ____ilog2_NaN(void);

	#define ilog2(n)				\
	(						\
		__builtin_constant_p(n) ? (		\
			(n) < 1 ? ____ilog2_NaN() :	\
			(n) & (1ULL << 63) ? 63 :	\
			(n) & (1ULL << 62) ? 62 :	\
			(n) & (1ULL << 61) ? 61 :	\
			(n) & (1ULL << 60) ? 60 :	\
			(n) & (1ULL << 59) ? 59 :	\
			(n) & (1ULL << 58) ? 58 :	\
			(n) & (1ULL << 57) ? 57 :	\
			(n) & (1ULL << 56) ? 56 :	\
			(n) & (1ULL << 55) ? 55 :	\
			(n) & (1ULL << 54) ? 54 :	\
			(n) & (1ULL << 53) ? 53 :	\
			(n) & (1ULL << 52) ? 52 :	\
			(n) & (1ULL << 51) ? 51 :	\
			(n) & (1ULL << 50) ? 50 :	\
			(n) & (1ULL << 49) ? 49 :	\
			(n) & (1ULL << 48) ? 48 :	\
			(n) & (1ULL << 47) ? 47 :	\
			(n) & (1ULL << 46) ? 46 :	\
			(n) & (1ULL << 45) ? 45 :	\
			(n) & (1ULL << 44) ? 44 :	\
			(n) & (1ULL << 43) ? 43 :	\
			(n) & (1ULL << 42) ? 42 :	\
			(n) & (1ULL << 41) ? 41 :	\
			(n) & (1ULL << 40) ? 40 :	\
			(n) & (1ULL << 39) ? 39 :	\
			(n) & (1ULL << 38) ? 38 :	\
			(n) & (1ULL << 37) ? 37 :	\
			(n) & (1ULL << 36) ? 36 :	\
			(n) & (1ULL << 35) ? 35 :	\
			(n) & (1ULL << 34) ? 34 :	\
			(n) & (1ULL << 33) ? 33 :	\
			(n) & (1ULL << 32) ? 32 :	\
			(n) & (1ULL << 31) ? 31 :	\
			(n) & (1ULL << 30) ? 30 :	\
			(n) & (1ULL << 29) ? 29 :	\
			(n) & (1ULL << 28) ? 28 :	\
			(n) & (1ULL << 27) ? 27 :	\
			(n) & (1ULL << 26) ? 26 :	\
			(n) & (1ULL << 25) ? 25 :	\
			(n) & (1ULL << 24) ? 24 :	\
			(n) & (1ULL << 23) ? 23 :	\
			(n) & (1ULL << 22) ? 22 :	\
			(n) & (1ULL << 21) ? 21 :	\
			(n) & (1ULL << 20) ? 20 :	\
			(n) & (1ULL << 19) ? 19 :	\
			(n) & (1ULL << 18) ? 18 :	\
			(n) & (1ULL << 17) ? 17 :	\
			(n) & (1ULL << 16) ? 16 :	\
			(n) & (1ULL << 15) ? 15 :	\
			(n) & (1ULL << 14) ? 14 :	\
			(n) & (1ULL << 13) ? 13 :	\
			(n) & (1ULL << 12) ? 12 :	\
			(n) & (1ULL << 11) ? 11 :	\
			(n) & (1ULL << 10) ? 10 :	\
			(n) & (1ULL <<  9) ?  9 :	\
			(n) & (1ULL <<  8) ?  8 :	\
			(n) & (1ULL <<  7) ?  7 :	\
			(n) & (1ULL <<  6) ?  6 :	\
			(n) & (1ULL <<  5) ?  5 :	\
			(n) & (1ULL <<  4) ?  4 :	\
			(n) & (1ULL <<  3) ?  3 :	\
			(n) & (1ULL <<  2) ?  2 :	\
			(n) & (1ULL <<  1) ?  1 :	\
			(n) & (1ULL <<  0) ?  0 :	\
			____ilog2_NaN()			\
					   ) :		\
		(sizeof(n) <= 4) ?			\
		__ilog2_u32(n) :			\
		__ilog2_u64(n)				\
	 )

	static noinline __attribute__((const))
	int old_get_order(unsigned long size)
	{
		int order;

		size = (size - 1) >> (PAGE_SHIFT - 1);
		order = -1;
		do {
			size >>= 1;
			order++;
		} while (size);
		return order;
	}

	static noinline __attribute__((const))
	int __get_order(unsigned long size)
	{
		int order;
		size--;
		size >>= PAGE_SHIFT;
	#if BITS_PER_LONG == 32
		order = fls(size);
	#else
		order = fls64(size);
	#endif
		return order;
	}

	#define get_order(n)						\
	(								\
		__builtin_constant_p(n) ? (				\
			(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
			((n < (1UL << PAGE_SHIFT)) ? 0 :		\
			 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
		) :							\
		__get_order(n)						\
	)

	#define order(N) \
		{ (1UL << N) - 1,	get_order((1UL << N) - 1)	},	\
		{ (1UL << N),		get_order((1UL << N))		},	\
		{ (1UL << N) + 1,	get_order((1UL << N) + 1)	}

	struct order {
		unsigned long n, order;
	};

	static const struct order order_table[] = {
		order(0),
		order(1),
		order(2),
		order(3),
		order(4),
		order(5),
		order(6),
		order(7),
		order(8),
		order(9),
		order(10),
		order(11),
		order(12),
		order(13),
		order(14),
		order(15),
		order(16),
		order(17),
		order(18),
		order(19),
		order(20),
		order(21),
		order(22),
		order(23),
		order(24),
		order(25),
		order(26),
		order(27),
		order(28),
		order(29),
		order(30),
		order(31),
	#if BITS_PER_LONG == 64
		order(32),
		order(33),
		order(34),
		order(35),
	#endif
		{ 0x2929 }
	};

	void check(int loop, unsigned long n)
	{
		unsigned long old, new;

		printf("[%2d]: %09lx | ", loop, n);

		old = old_get_order(n);
		new = get_order(n);

		printf("%3ld, %3ld\n", old, new);
		if (n != 0 && old != new)
			abort();
	}

	int main(int argc, char **argv)
	{
		const struct order *p;
		unsigned long n;
		int loop;

		for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
			n = 1UL << loop;
			check(loop, n - 1);
			check(loop, n);
			check(loop, n + 1);
		}

		for (p = order_table; p->n != 0x2929; p++) {
			unsigned long old, new;

			old = old_get_order(p->n);
			new = p->order;
			printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
			if (p->n != 0 && old != new)
				abort();
		}

		return 0;
	}

Disassembling the x86_64 version of the above code shows:

	0000000000400510 <old_get_order>:
	  400510:       48 83 ef 01             sub    $0x1,%rdi
	  400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
	  400519:       48 c1 ef 0b             shr    $0xb,%rdi
	  40051d:       0f 1f 00                nopl   (%rax)
	  400520:       83 c0 01                add    $0x1,%eax
	  400523:       48 d1 ef                shr    %rdi
	  400526:       75 f8                   jne    400520 <old_get_order+0x10>
	  400528:       f3 c3                   repz retq
	  40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

	0000000000400530 <__get_order>:
	  400530:       48 83 ef 01             sub    $0x1,%rdi
	  400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
	  40053b:       48 c1 ef 0c             shr    $0xc,%rdi
	  40053f:       48 0f bd c7             bsr    %rdi,%rax
	  400543:       83 c0 01                add    $0x1,%eax
	  400546:       c3                      retq
	  400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
	  40054e:       00 00

As can be seen, the new __get_order() function is simpler than the
old_get_order() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
 1 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+	int order;
+
+	size--;
+	size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+	order = fls(size);
+#else
+	order = fls64(size);
+#endif
+	return order;
+}
 
 /**
  * get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
  * This function may be used to initialise variables with compile time
  * evaluations of constants.
  */
-static inline __attribute_const__ int get_order(unsigned long size)
-{
-	int order;
-
-	size = (size - 1) >> (PAGE_SHIFT - 1);
-	order = -1;
-	do {
-		size >>= 1;
-		order++;
-	} while (size);
-	return order;
-}
+#define get_order(n)						\
+(								\
+	__builtin_constant_p(n) ? (				\
+		(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
+		((n < (1UL << PAGE_SHIFT)) ? 0 :		\
+		 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
+	) :							\
+	__get_order(n)						\
+)
 
 #endif	/* __ASSEMBLY__ */
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
@ 2012-02-29 20:29       ` Paul Gortmaker
  0 siblings, 0 replies; 9+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
  To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
  Cc: linux-tip-commits, linux-next

On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author:     David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer:  H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()

This is causing build failures on non-x86 in linux next according to git bisect.

Here is one example:

http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/

Paul.
--

>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop.  Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64().  It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result.  This program
> tests both dynamic and static parameters.
>
>        #include <stdlib.h>
>        #include <stdio.h>
>
>        #ifdef __x86_64__
>        #define BITS_PER_LONG 64
>        #else
>        #define BITS_PER_LONG 32
>        #endif
>
>        #define PAGE_SHIFT 12
>
>        typedef unsigned long long __u64, u64;
>        typedef unsigned int __u32, u32;
>        #define noinline        __attribute__((noinline))
>
>        static inline int fls(int x)
>        {
>                int bitpos = -1;
>
>                asm("bsrl %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        }
>
>        static __always_inline int fls64(__u64 x)
>        {
>        #if BITS_PER_LONG == 64
>                long bitpos = -1;
>
>                asm("bsrq %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        #else
>                __u32 h = x >> 32, l = x;
>                int bitpos = -1;
>
>                asm("bsrl       %1,%0   \n"
>                    "subl       %2,%0   \n"
>                    "bsrl       %3,%0   \n"
>                    : "+r" (bitpos)
>                    : "rm" (l), "i"(32), "rm" (h));
>
>                return bitpos + 33;
>        #endif
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u32(u32 n)
>        {
>                return fls(n) - 1;
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u64(u64 n)
>        {
>                return fls64(n) - 1;
>        }
>
>        extern __attribute__((const, noreturn))
>        int ____ilog2_NaN(void);
>
>        #define ilog2(n)                                \
>        (                                               \
>                __builtin_constant_p(n) ? (             \
>                        (n) < 1 ? ____ilog2_NaN() :     \
>                        (n) & (1ULL << 63) ? 63 :       \
>                        (n) & (1ULL << 62) ? 62 :       \
>                        (n) & (1ULL << 61) ? 61 :       \
>                        (n) & (1ULL << 60) ? 60 :       \
>                        (n) & (1ULL << 59) ? 59 :       \
>                        (n) & (1ULL << 58) ? 58 :       \
>                        (n) & (1ULL << 57) ? 57 :       \
>                        (n) & (1ULL << 56) ? 56 :       \
>                        (n) & (1ULL << 55) ? 55 :       \
>                        (n) & (1ULL << 54) ? 54 :       \
>                        (n) & (1ULL << 53) ? 53 :       \
>                        (n) & (1ULL << 52) ? 52 :       \
>                        (n) & (1ULL << 51) ? 51 :       \
>                        (n) & (1ULL << 50) ? 50 :       \
>                        (n) & (1ULL << 49) ? 49 :       \
>                        (n) & (1ULL << 48) ? 48 :       \
>                        (n) & (1ULL << 47) ? 47 :       \
>                        (n) & (1ULL << 46) ? 46 :       \
>                        (n) & (1ULL << 45) ? 45 :       \
>                        (n) & (1ULL << 44) ? 44 :       \
>                        (n) & (1ULL << 43) ? 43 :       \
>                        (n) & (1ULL << 42) ? 42 :       \
>                        (n) & (1ULL << 41) ? 41 :       \
>                        (n) & (1ULL << 40) ? 40 :       \
>                        (n) & (1ULL << 39) ? 39 :       \
>                        (n) & (1ULL << 38) ? 38 :       \
>                        (n) & (1ULL << 37) ? 37 :       \
>                        (n) & (1ULL << 36) ? 36 :       \
>                        (n) & (1ULL << 35) ? 35 :       \
>                        (n) & (1ULL << 34) ? 34 :       \
>                        (n) & (1ULL << 33) ? 33 :       \
>                        (n) & (1ULL << 32) ? 32 :       \
>                        (n) & (1ULL << 31) ? 31 :       \
>                        (n) & (1ULL << 30) ? 30 :       \
>                        (n) & (1ULL << 29) ? 29 :       \
>                        (n) & (1ULL << 28) ? 28 :       \
>                        (n) & (1ULL << 27) ? 27 :       \
>                        (n) & (1ULL << 26) ? 26 :       \
>                        (n) & (1ULL << 25) ? 25 :       \
>                        (n) & (1ULL << 24) ? 24 :       \
>                        (n) & (1ULL << 23) ? 23 :       \
>                        (n) & (1ULL << 22) ? 22 :       \
>                        (n) & (1ULL << 21) ? 21 :       \
>                        (n) & (1ULL << 20) ? 20 :       \
>                        (n) & (1ULL << 19) ? 19 :       \
>                        (n) & (1ULL << 18) ? 18 :       \
>                        (n) & (1ULL << 17) ? 17 :       \
>                        (n) & (1ULL << 16) ? 16 :       \
>                        (n) & (1ULL << 15) ? 15 :       \
>                        (n) & (1ULL << 14) ? 14 :       \
>                        (n) & (1ULL << 13) ? 13 :       \
>                        (n) & (1ULL << 12) ? 12 :       \
>                        (n) & (1ULL << 11) ? 11 :       \
>                        (n) & (1ULL << 10) ? 10 :       \
>                        (n) & (1ULL <<  9) ?  9 :       \
>                        (n) & (1ULL <<  8) ?  8 :       \
>                        (n) & (1ULL <<  7) ?  7 :       \
>                        (n) & (1ULL <<  6) ?  6 :       \
>                        (n) & (1ULL <<  5) ?  5 :       \
>                        (n) & (1ULL <<  4) ?  4 :       \
>                        (n) & (1ULL <<  3) ?  3 :       \
>                        (n) & (1ULL <<  2) ?  2 :       \
>                        (n) & (1ULL <<  1) ?  1 :       \
>                        (n) & (1ULL <<  0) ?  0 :       \
>                        ____ilog2_NaN()                 \
>                                           ) :          \
>                (sizeof(n) <= 4) ?                      \
>                __ilog2_u32(n) :                        \
>                __ilog2_u64(n)                          \
>         )
>
>        static noinline __attribute__((const))
>        int old_get_order(unsigned long size)
>        {
>                int order;
>
>                size = (size - 1) >> (PAGE_SHIFT - 1);
>                order = -1;
>                do {
>                        size >>= 1;
>                        order++;
>                } while (size);
>                return order;
>        }
>
>        static noinline __attribute__((const))
>        int __get_order(unsigned long size)
>        {
>                int order;
>                size--;
>                size >>= PAGE_SHIFT;
>        #if BITS_PER_LONG == 32
>                order = fls(size);
>        #else
>                order = fls64(size);
>        #endif
>                return order;
>        }
>
>        #define get_order(n)                                            \
>        (                                                               \
>                __builtin_constant_p(n) ? (                             \
>                        (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
>                        ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
>                         ilog2((n) - 1) - PAGE_SHIFT + 1)               \
>                ) :                                                     \
>                __get_order(n)                                          \
>        )
>
>        #define order(N) \
>                { (1UL << N) - 1,       get_order((1UL << N) - 1)       },      \
>                { (1UL << N),           get_order((1UL << N))           },      \
>                { (1UL << N) + 1,       get_order((1UL << N) + 1)       }
>
>        struct order {
>                unsigned long n, order;
>        };
>
>        static const struct order order_table[] = {
>                order(0),
>                order(1),
>                order(2),
>                order(3),
>                order(4),
>                order(5),
>                order(6),
>                order(7),
>                order(8),
>                order(9),
>                order(10),
>                order(11),
>                order(12),
>                order(13),
>                order(14),
>                order(15),
>                order(16),
>                order(17),
>                order(18),
>                order(19),
>                order(20),
>                order(21),
>                order(22),
>                order(23),
>                order(24),
>                order(25),
>                order(26),
>                order(27),
>                order(28),
>                order(29),
>                order(30),
>                order(31),
>        #if BITS_PER_LONG == 64
>                order(32),
>                order(33),
>                order(34),
>                order(35),
>        #endif
>                { 0x2929 }
>        };
>
>        void check(int loop, unsigned long n)
>        {
>                unsigned long old, new;
>
>                printf("[%2d]: %09lx | ", loop, n);
>
>                old = old_get_order(n);
>                new = get_order(n);
>
>                printf("%3ld, %3ld\n", old, new);
>                if (n != 0 && old != new)
>                        abort();
>        }
>
>        int main(int argc, char **argv)
>        {
>                const struct order *p;
>                unsigned long n;
>                int loop;
>
>                for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
>                        n = 1UL << loop;
>                        check(loop, n - 1);
>                        check(loop, n);
>                        check(loop, n + 1);
>                }
>
>                for (p = order_table; p->n != 0x2929; p++) {
>                        unsigned long old, new;
>
>                        old = old_get_order(p->n);
>                        new = p->order;
>                        printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
>                        if (p->n != 0 && old != new)
>                                abort();
>                }
>
>                return 0;
>        }
>
> Disassembling the x86_64 version of the above code shows:
>
>        0000000000400510 <old_get_order>:
>          400510:       48 83 ef 01             sub    $0x1,%rdi
>          400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
>          400519:       48 c1 ef 0b             shr    $0xb,%rdi
>          40051d:       0f 1f 00                nopl   (%rax)
>          400520:       83 c0 01                add    $0x1,%eax
>          400523:       48 d1 ef                shr    %rdi
>          400526:       75 f8                   jne    400520 <old_get_order+0x10>
>          400528:       f3 c3                   repz retq
>          40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
>
>        0000000000400530 <__get_order>:
>          400530:       48 83 ef 01             sub    $0x1,%rdi
>          400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
>          40053b:       48 c1 ef 0c             shr    $0xc,%rdi
>          40053f:       48 0f bd c7             bsr    %rdi,%rax
>          400543:       83 c0 01                add    $0x1,%eax
>          400546:       c3                      retq
>          400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
>          40054e:       00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
>  include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
>  1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
>  #ifndef __ASSEMBLY__
>
>  #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> +       int order;
> +
> +       size--;
> +       size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> +       order = fls(size);
> +#else
> +       order = fls64(size);
> +#endif
> +       return order;
> +}
>
>  /**
>  * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
>  * This function may be used to initialise variables with compile time
>  * evaluations of constants.
>  */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> -       int order;
> -
> -       size = (size - 1) >> (PAGE_SHIFT - 1);
> -       order = -1;
> -       do {
> -               size >>= 1;
> -               order++;
> -       } while (size);
> -       return order;
> -}
> +#define get_order(n)                                           \
> +(                                                              \
> +       __builtin_constant_p(n) ? (                             \
> +               (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
> +               ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
> +                ilog2((n) - 1) - PAGE_SHIFT + 1)               \
> +       ) :                                                     \
> +       __get_order(n)                                          \
> +)
>
>  #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
@ 2012-02-29 20:29       ` Paul Gortmaker
  0 siblings, 0 replies; 9+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
  To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
  Cc: linux-tip-commits, linux-next

On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author:     David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer:  H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()

This is causing build failures on non-x86 in linux next according to git bisect.

Here is one example:

http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/

Paul.
--

>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop.  Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64().  It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result.  This program
> tests both dynamic and static parameters.
>
>        #include <stdlib.h>
>        #include <stdio.h>
>
>        #ifdef __x86_64__
>        #define BITS_PER_LONG 64
>        #else
>        #define BITS_PER_LONG 32
>        #endif
>
>        #define PAGE_SHIFT 12
>
>        typedef unsigned long long __u64, u64;
>        typedef unsigned int __u32, u32;
>        #define noinline        __attribute__((noinline))
>
>        static inline int fls(int x)
>        {
>                int bitpos = -1;
>
>                asm("bsrl %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        }
>
>        static __always_inline int fls64(__u64 x)
>        {
>        #if BITS_PER_LONG == 64
>                long bitpos = -1;
>
>                asm("bsrq %1,%0"
>                    : "+r" (bitpos)
>                    : "rm" (x));
>                return bitpos + 1;
>        #else
>                __u32 h = x >> 32, l = x;
>                int bitpos = -1;
>
>                asm("bsrl       %1,%0   \n"
>                    "subl       %2,%0   \n"
>                    "bsrl       %3,%0   \n"
>                    : "+r" (bitpos)
>                    : "rm" (l), "i"(32), "rm" (h));
>
>                return bitpos + 33;
>        #endif
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u32(u32 n)
>        {
>                return fls(n) - 1;
>        }
>
>        static inline __attribute__((const))
>        int __ilog2_u64(u64 n)
>        {
>                return fls64(n) - 1;
>        }
>
>        extern __attribute__((const, noreturn))
>        int ____ilog2_NaN(void);
>
>        #define ilog2(n)                                \
>        (                                               \
>                __builtin_constant_p(n) ? (             \
>                        (n) < 1 ? ____ilog2_NaN() :     \
>                        (n) & (1ULL << 63) ? 63 :       \
>                        (n) & (1ULL << 62) ? 62 :       \
>                        (n) & (1ULL << 61) ? 61 :       \
>                        (n) & (1ULL << 60) ? 60 :       \
>                        (n) & (1ULL << 59) ? 59 :       \
>                        (n) & (1ULL << 58) ? 58 :       \
>                        (n) & (1ULL << 57) ? 57 :       \
>                        (n) & (1ULL << 56) ? 56 :       \
>                        (n) & (1ULL << 55) ? 55 :       \
>                        (n) & (1ULL << 54) ? 54 :       \
>                        (n) & (1ULL << 53) ? 53 :       \
>                        (n) & (1ULL << 52) ? 52 :       \
>                        (n) & (1ULL << 51) ? 51 :       \
>                        (n) & (1ULL << 50) ? 50 :       \
>                        (n) & (1ULL << 49) ? 49 :       \
>                        (n) & (1ULL << 48) ? 48 :       \
>                        (n) & (1ULL << 47) ? 47 :       \
>                        (n) & (1ULL << 46) ? 46 :       \
>                        (n) & (1ULL << 45) ? 45 :       \
>                        (n) & (1ULL << 44) ? 44 :       \
>                        (n) & (1ULL << 43) ? 43 :       \
>                        (n) & (1ULL << 42) ? 42 :       \
>                        (n) & (1ULL << 41) ? 41 :       \
>                        (n) & (1ULL << 40) ? 40 :       \
>                        (n) & (1ULL << 39) ? 39 :       \
>                        (n) & (1ULL << 38) ? 38 :       \
>                        (n) & (1ULL << 37) ? 37 :       \
>                        (n) & (1ULL << 36) ? 36 :       \
>                        (n) & (1ULL << 35) ? 35 :       \
>                        (n) & (1ULL << 34) ? 34 :       \
>                        (n) & (1ULL << 33) ? 33 :       \
>                        (n) & (1ULL << 32) ? 32 :       \
>                        (n) & (1ULL << 31) ? 31 :       \
>                        (n) & (1ULL << 30) ? 30 :       \
>                        (n) & (1ULL << 29) ? 29 :       \
>                        (n) & (1ULL << 28) ? 28 :       \
>                        (n) & (1ULL << 27) ? 27 :       \
>                        (n) & (1ULL << 26) ? 26 :       \
>                        (n) & (1ULL << 25) ? 25 :       \
>                        (n) & (1ULL << 24) ? 24 :       \
>                        (n) & (1ULL << 23) ? 23 :       \
>                        (n) & (1ULL << 22) ? 22 :       \
>                        (n) & (1ULL << 21) ? 21 :       \
>                        (n) & (1ULL << 20) ? 20 :       \
>                        (n) & (1ULL << 19) ? 19 :       \
>                        (n) & (1ULL << 18) ? 18 :       \
>                        (n) & (1ULL << 17) ? 17 :       \
>                        (n) & (1ULL << 16) ? 16 :       \
>                        (n) & (1ULL << 15) ? 15 :       \
>                        (n) & (1ULL << 14) ? 14 :       \
>                        (n) & (1ULL << 13) ? 13 :       \
>                        (n) & (1ULL << 12) ? 12 :       \
>                        (n) & (1ULL << 11) ? 11 :       \
>                        (n) & (1ULL << 10) ? 10 :       \
>                        (n) & (1ULL <<  9) ?  9 :       \
>                        (n) & (1ULL <<  8) ?  8 :       \
>                        (n) & (1ULL <<  7) ?  7 :       \
>                        (n) & (1ULL <<  6) ?  6 :       \
>                        (n) & (1ULL <<  5) ?  5 :       \
>                        (n) & (1ULL <<  4) ?  4 :       \
>                        (n) & (1ULL <<  3) ?  3 :       \
>                        (n) & (1ULL <<  2) ?  2 :       \
>                        (n) & (1ULL <<  1) ?  1 :       \
>                        (n) & (1ULL <<  0) ?  0 :       \
>                        ____ilog2_NaN()                 \
>                                           ) :          \
>                (sizeof(n) <= 4) ?                      \
>                __ilog2_u32(n) :                        \
>                __ilog2_u64(n)                          \
>         )
>
>        static noinline __attribute__((const))
>        int old_get_order(unsigned long size)
>        {
>                int order;
>
>                size = (size - 1) >> (PAGE_SHIFT - 1);
>                order = -1;
>                do {
>                        size >>= 1;
>                        order++;
>                } while (size);
>                return order;
>        }
>
>        static noinline __attribute__((const))
>        int __get_order(unsigned long size)
>        {
>                int order;
>                size--;
>                size >>= PAGE_SHIFT;
>        #if BITS_PER_LONG == 32
>                order = fls(size);
>        #else
>                order = fls64(size);
>        #endif
>                return order;
>        }
>
>        #define get_order(n)                                            \
>        (                                                               \
>                __builtin_constant_p(n) ? (                             \
>                        (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
>                        ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
>                         ilog2((n) - 1) - PAGE_SHIFT + 1)               \
>                ) :                                                     \
>                __get_order(n)                                          \
>        )
>
>        #define order(N) \
>                { (1UL << N) - 1,       get_order((1UL << N) - 1)       },      \
>                { (1UL << N),           get_order((1UL << N))           },      \
>                { (1UL << N) + 1,       get_order((1UL << N) + 1)       }
>
>        struct order {
>                unsigned long n, order;
>        };
>
>        static const struct order order_table[] = {
>                order(0),
>                order(1),
>                order(2),
>                order(3),
>                order(4),
>                order(5),
>                order(6),
>                order(7),
>                order(8),
>                order(9),
>                order(10),
>                order(11),
>                order(12),
>                order(13),
>                order(14),
>                order(15),
>                order(16),
>                order(17),
>                order(18),
>                order(19),
>                order(20),
>                order(21),
>                order(22),
>                order(23),
>                order(24),
>                order(25),
>                order(26),
>                order(27),
>                order(28),
>                order(29),
>                order(30),
>                order(31),
>        #if BITS_PER_LONG == 64
>                order(32),
>                order(33),
>                order(34),
>                order(35),
>        #endif
>                { 0x2929 }
>        };
>
>        void check(int loop, unsigned long n)
>        {
>                unsigned long old, new;
>
>                printf("[%2d]: %09lx | ", loop, n);
>
>                old = old_get_order(n);
>                new = get_order(n);
>
>                printf("%3ld, %3ld\n", old, new);
>                if (n != 0 && old != new)
>                        abort();
>        }
>
>        int main(int argc, char **argv)
>        {
>                const struct order *p;
>                unsigned long n;
>                int loop;
>
>                for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
>                        n = 1UL << loop;
>                        check(loop, n - 1);
>                        check(loop, n);
>                        check(loop, n + 1);
>                }
>
>                for (p = order_table; p->n != 0x2929; p++) {
>                        unsigned long old, new;
>
>                        old = old_get_order(p->n);
>                        new = p->order;
>                        printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
>                        if (p->n != 0 && old != new)
>                                abort();
>                }
>
>                return 0;
>        }
>
> Disassembling the x86_64 version of the above code shows:
>
>        0000000000400510 <old_get_order>:
>          400510:       48 83 ef 01             sub    $0x1,%rdi
>          400514:       b8 ff ff ff ff          mov    $0xffffffff,%eax
>          400519:       48 c1 ef 0b             shr    $0xb,%rdi
>          40051d:       0f 1f 00                nopl   (%rax)
>          400520:       83 c0 01                add    $0x1,%eax
>          400523:       48 d1 ef                shr    %rdi
>          400526:       75 f8                   jne    400520 <old_get_order+0x10>
>          400528:       f3 c3                   repz retq
>          40052a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
>
>        0000000000400530 <__get_order>:
>          400530:       48 83 ef 01             sub    $0x1,%rdi
>          400534:       48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
>          40053b:       48 c1 ef 0c             shr    $0xc,%rdi
>          40053f:       48 0f bd c7             bsr    %rdi,%rax
>          400543:       83 c0 01                add    $0x1,%eax
>          400546:       c3                      retq
>          400547:       66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
>          40054e:       00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
>  include/asm-generic/getorder.h |   40 ++++++++++++++++++++++++++++------------
>  1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
>  #ifndef __ASSEMBLY__
>
>  #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> +       int order;
> +
> +       size--;
> +       size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> +       order = fls(size);
> +#else
> +       order = fls64(size);
> +#endif
> +       return order;
> +}
>
>  /**
>  * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
>  * This function may be used to initialise variables with compile time
>  * evaluations of constants.
>  */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> -       int order;
> -
> -       size = (size - 1) >> (PAGE_SHIFT - 1);
> -       order = -1;
> -       do {
> -               size >>= 1;
> -               order++;
> -       } while (size);
> -       return order;
> -}
> +#define get_order(n)                                           \
> +(                                                              \
> +       __builtin_constant_p(n) ? (                             \
> +               (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :       \
> +               ((n < (1UL << PAGE_SHIFT)) ? 0 :                \
> +                ilog2((n) - 1) - PAGE_SHIFT + 1)               \
> +       ) :                                                     \
> +       __get_order(n)                                          \
> +)
>
>  #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-02-29 20:29       ` Paul Gortmaker
  (?)
@ 2012-03-01  0:31       ` Stephen Rothwell
  2012-03-01 21:02         ` H. Peter Anvin
  2012-03-06 14:24         ` David Howells
  -1 siblings, 2 replies; 9+ messages in thread
From: Stephen Rothwell @ 2012-03-01  0:31 UTC (permalink / raw)
  To: Paul Gortmaker
  Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits,
	linux-next

[-- Attachment #1: Type: text/plain, Size: 1199 bytes --]

On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
> <dhowells@redhat.com> wrote:
> > Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
> > Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> > Author:     David Howells <dhowells@redhat.com>
> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> > Committer:  H. Peter Anvin <hpa@zytor.com>
> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
> >
> > bitops: Optimise get_order()
> 
> This is causing build failures on non-x86 in linux next according to git bisect.

Presumably it needs to include linux/bitops.h (and see below).

> > +static inline __attribute_const__
> > +int __get_order(unsigned long size)
> > +{
> > +       int order;
> > +
> > +       size--;
> > +       size >>= PAGE_SHIFT;
> > +#if BITS_PER_LONG == 32
> > +       order = fls(size);
> > +#else
> > +       order = fls64(size);
> > +#endif

linux/bitops.h has fls_long() that does this size test and calls the right thing.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-03-01  0:31       ` Stephen Rothwell
@ 2012-03-01 21:02         ` H. Peter Anvin
  2012-03-06 14:24         ` David Howells
  1 sibling, 0 replies; 9+ messages in thread
From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx,
	linux-tip-commits, linux-next

David, could you make a fix for this?

	-hpa

On 02/29/2012 04:31 PM, Stephen Rothwell wrote:
> On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com>  wrote:
>>
>> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
>> <dhowells@redhat.com>  wrote:
>>> Commit-ID:  d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Gitweb:     http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Author:     David Howells<dhowells@redhat.com>
>>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
>>> Committer:  H. Peter Anvin<hpa@zytor.com>
>>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>>>
>>> bitops: Optimise get_order()
>>
>> This is causing build failures on non-x86 in linux next according to git bisect.
>
> Presumably it needs to include linux/bitops.h (and see below).
>
>>> +static inline __attribute_const__
>>> +int __get_order(unsigned long size)
>>> +{
>>> +       int order;
>>> +
>>> +       size--;
>>> +       size>>= PAGE_SHIFT;
>>> +#if BITS_PER_LONG == 32
>>> +       order = fls(size);
>>> +#else
>>> +       order = fls64(size);
>>> +#endif
>
> linux/bitops.h has fls_long() that does this size test and calls the right thing.
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [tip:x86/asm] bitops: Optimise get_order()
  2012-03-01  0:31       ` Stephen Rothwell
  2012-03-01 21:02         ` H. Peter Anvin
@ 2012-03-06 14:24         ` David Howells
  1 sibling, 0 replies; 9+ messages in thread
From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx,
	linux-tip-commits, linux-next

Stephen Rothwell <sfr@canb.auug.org.au> wrote:

> Presumably it needs to include linux/bitops.h (and see below).

That won't help.  It already includes that via linux/log2.h.

The error chain looks suspiciously like a circular dependency has occurred.

David

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2012-03-06 14:25 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:20   ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-29 20:29     ` Paul Gortmaker
2012-02-29 20:29       ` Paul Gortmaker
2012-03-01  0:31       ` Stephen Rothwell
2012-03-01 21:02         ` H. Peter Anvin
2012-03-06 14:24         ` David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.