* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case
@ 2012-02-20 22:39 David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
0 siblings, 2 replies; 9+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann
Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
include/asm-generic/getorder.h | 23 ++++++++++++++++++++++-
1 files changed, 22 insertions(+), 1 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
#include <linux/compiler.h>
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory. This
+ * is on a logarithmic scale, where:
+ *
+ * 0 -> 2^0 * PAGE_SIZE and below
+ * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ * ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
static inline __attribute_const__ int get_order(unsigned long size)
{
int order;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] Optimise get_order()
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
@ 2012-02-20 22:39 ` David Howells
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
1 sibling, 1 reply; 9+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann
Optimise get_order() to use bit scanning instructions if such exist rather than
a loop. Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.
This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64(). It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result. This program
tests both dynamic and static parameters.
#include <stdlib.h>
#include <stdio.h>
#ifdef __x86_64__
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif
#define PAGE_SHIFT 12
typedef unsigned long long __u64, u64;
typedef unsigned int __u32, u32;
#define noinline __attribute__((noinline))
static inline int fls(int x)
{
int bitpos = -1;
asm("bsrl %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
}
static __always_inline int fls64(__u64 x)
{
#if BITS_PER_LONG == 64
long bitpos = -1;
asm("bsrq %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
#else
__u32 h = x >> 32, l = x;
int bitpos = -1;
asm("bsrl %1,%0 \n"
"subl %2,%0 \n"
"bsrl %3,%0 \n"
: "+r" (bitpos)
: "rm" (l), "i"(32), "rm" (h));
return bitpos + 33;
#endif
}
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
static noinline __attribute__((const))
int old_get_order(unsigned long size)
{
int order;
size = (size - 1) >> (PAGE_SHIFT - 1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
static noinline __attribute__((const))
int __get_order(unsigned long size)
{
int order;
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
((n < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#define order(N) \
{ (1UL << N) - 1, get_order((1UL << N) - 1) }, \
{ (1UL << N), get_order((1UL << N)) }, \
{ (1UL << N) + 1, get_order((1UL << N) + 1) }
struct order {
unsigned long n, order;
};
static const struct order order_table[] = {
order(0),
order(1),
order(2),
order(3),
order(4),
order(5),
order(6),
order(7),
order(8),
order(9),
order(10),
order(11),
order(12),
order(13),
order(14),
order(15),
order(16),
order(17),
order(18),
order(19),
order(20),
order(21),
order(22),
order(23),
order(24),
order(25),
order(26),
order(27),
order(28),
order(29),
order(30),
order(31),
#if BITS_PER_LONG == 64
order(32),
order(33),
order(34),
order(35),
#endif
{ 0x2929 }
};
void check(int loop, unsigned long n)
{
unsigned long old, new;
printf("[%2d]: %09lx | ", loop, n);
old = old_get_order(n);
new = get_order(n);
printf("%3ld, %3ld\n", old, new);
if (n != 0 && old != new)
abort();
}
int main(int argc, char **argv)
{
const struct order *p;
unsigned long n;
int loop;
for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
n = 1UL << loop;
check(loop, n - 1);
check(loop, n);
check(loop, n + 1);
}
for (p = order_table; p->n != 0x2929; p++) {
unsigned long old, new;
old = old_get_order(p->n);
new = p->order;
printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
if (p->n != 0 && old != new)
abort();
}
return 0;
}
Disassembling the x86_64 version of the above code shows:
0000000000400510 <old_get_order>:
400510: 48 83 ef 01 sub $0x1,%rdi
400514: b8 ff ff ff ff mov $0xffffffff,%eax
400519: 48 c1 ef 0b shr $0xb,%rdi
40051d: 0f 1f 00 nopl (%rax)
400520: 83 c0 01 add $0x1,%eax
400523: 48 d1 ef shr %rdi
400526: 75 f8 jne 400520 <old_get_order+0x10>
400528: f3 c3 repz retq
40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400530 <__get_order>:
400530: 48 83 ef 01 sub $0x1,%rdi
400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
40053b: 48 c1 ef 0c shr $0xc,%rdi
40053f: 48 0f bd c7 bsr %rdi,%rax
400543: 83 c0 01 add $0x1,%eax
400546: c3 retq
400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40054e: 00 00
As can be seen, the new __get_order() function is simpler than the
old_get_order() function.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
1 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+ int order;
+
+ size--;
+ size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+ order = fls(size);
+#else
+ order = fls64(size);
+#endif
+ return order;
+}
/**
* get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
-static inline __attribute_const__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size - 1) >> (PAGE_SHIFT - 1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
+#define get_order(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
+ ((n < (1UL << PAGE_SHIFT)) ? 0 : \
+ ilog2((n) - 1) - PAGE_SHIFT + 1) \
+ ) : \
+ __get_order(n) \
+)
#endif /* __ASSEMBLY__ */
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:19 ` tip-bot for David Howells
1 sibling, 0 replies; 9+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw)
To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx
Commit-ID: e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Gitweb: http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Author: David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000
Committer: H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:46:55 -0800
bitops: Adjust the comment on get_order() to describe the size==0 case
Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
include/asm-generic/getorder.h | 23 ++++++++++++++++++++++-
1 files changed, 22 insertions(+), 1 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
#include <linux/compiler.h>
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory. This
+ * is on a logarithmic scale, where:
+ *
+ * 0 -> 2^0 * PAGE_SIZE and below
+ * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ * ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
static inline __attribute_const__ int get_order(unsigned long size)
{
int order;
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [tip:x86/asm] bitops: Optimise get_order()
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:20 ` tip-bot for David Howells
2012-02-29 20:29 ` Paul Gortmaker
0 siblings, 1 reply; 9+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw)
To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx
Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
Author: David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
Committer: H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
bitops: Optimise get_order()
Optimise get_order() to use bit scanning instructions if such exist rather than
a loop. Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.
This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64(). It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result. This program
tests both dynamic and static parameters.
#include <stdlib.h>
#include <stdio.h>
#ifdef __x86_64__
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif
#define PAGE_SHIFT 12
typedef unsigned long long __u64, u64;
typedef unsigned int __u32, u32;
#define noinline __attribute__((noinline))
static inline int fls(int x)
{
int bitpos = -1;
asm("bsrl %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
}
static __always_inline int fls64(__u64 x)
{
#if BITS_PER_LONG == 64
long bitpos = -1;
asm("bsrq %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
#else
__u32 h = x >> 32, l = x;
int bitpos = -1;
asm("bsrl %1,%0 \n"
"subl %2,%0 \n"
"bsrl %3,%0 \n"
: "+r" (bitpos)
: "rm" (l), "i"(32), "rm" (h));
return bitpos + 33;
#endif
}
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
static noinline __attribute__((const))
int old_get_order(unsigned long size)
{
int order;
size = (size - 1) >> (PAGE_SHIFT - 1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
static noinline __attribute__((const))
int __get_order(unsigned long size)
{
int order;
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
((n < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#define order(N) \
{ (1UL << N) - 1, get_order((1UL << N) - 1) }, \
{ (1UL << N), get_order((1UL << N)) }, \
{ (1UL << N) + 1, get_order((1UL << N) + 1) }
struct order {
unsigned long n, order;
};
static const struct order order_table[] = {
order(0),
order(1),
order(2),
order(3),
order(4),
order(5),
order(6),
order(7),
order(8),
order(9),
order(10),
order(11),
order(12),
order(13),
order(14),
order(15),
order(16),
order(17),
order(18),
order(19),
order(20),
order(21),
order(22),
order(23),
order(24),
order(25),
order(26),
order(27),
order(28),
order(29),
order(30),
order(31),
#if BITS_PER_LONG == 64
order(32),
order(33),
order(34),
order(35),
#endif
{ 0x2929 }
};
void check(int loop, unsigned long n)
{
unsigned long old, new;
printf("[%2d]: %09lx | ", loop, n);
old = old_get_order(n);
new = get_order(n);
printf("%3ld, %3ld\n", old, new);
if (n != 0 && old != new)
abort();
}
int main(int argc, char **argv)
{
const struct order *p;
unsigned long n;
int loop;
for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
n = 1UL << loop;
check(loop, n - 1);
check(loop, n);
check(loop, n + 1);
}
for (p = order_table; p->n != 0x2929; p++) {
unsigned long old, new;
old = old_get_order(p->n);
new = p->order;
printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
if (p->n != 0 && old != new)
abort();
}
return 0;
}
Disassembling the x86_64 version of the above code shows:
0000000000400510 <old_get_order>:
400510: 48 83 ef 01 sub $0x1,%rdi
400514: b8 ff ff ff ff mov $0xffffffff,%eax
400519: 48 c1 ef 0b shr $0xb,%rdi
40051d: 0f 1f 00 nopl (%rax)
400520: 83 c0 01 add $0x1,%eax
400523: 48 d1 ef shr %rdi
400526: 75 f8 jne 400520 <old_get_order+0x10>
400528: f3 c3 repz retq
40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400530 <__get_order>:
400530: 48 83 ef 01 sub $0x1,%rdi
400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
40053b: 48 c1 ef 0c shr $0xc,%rdi
40053f: 48 0f bd c7 bsr %rdi,%rax
400543: 83 c0 01 add $0x1,%eax
400546: c3 retq
400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40054e: 00 00
As can be seen, the new __get_order() function is simpler than the
old_get_order() function.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
1 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+ int order;
+
+ size--;
+ size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+ order = fls(size);
+#else
+ order = fls64(size);
+#endif
+ return order;
+}
/**
* get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
-static inline __attribute_const__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size - 1) >> (PAGE_SHIFT - 1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
+#define get_order(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
+ ((n < (1UL << PAGE_SHIFT)) ? 0 : \
+ ilog2((n) - 1) - PAGE_SHIFT + 1) \
+ ) : \
+ __get_order(n) \
+)
#endif /* __ASSEMBLY__ */
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
@ 2012-02-29 20:29 ` Paul Gortmaker
0 siblings, 0 replies; 9+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
Cc: linux-tip-commits, linux-next
On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author: David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer: H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()
This is causing build failures on non-x86 in linux next according to git bisect.
Here is one example:
http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/
Paul.
--
>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop. Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64(). It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result. This program
> tests both dynamic and static parameters.
>
> #include <stdlib.h>
> #include <stdio.h>
>
> #ifdef __x86_64__
> #define BITS_PER_LONG 64
> #else
> #define BITS_PER_LONG 32
> #endif
>
> #define PAGE_SHIFT 12
>
> typedef unsigned long long __u64, u64;
> typedef unsigned int __u32, u32;
> #define noinline __attribute__((noinline))
>
> static inline int fls(int x)
> {
> int bitpos = -1;
>
> asm("bsrl %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> }
>
> static __always_inline int fls64(__u64 x)
> {
> #if BITS_PER_LONG == 64
> long bitpos = -1;
>
> asm("bsrq %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> #else
> __u32 h = x >> 32, l = x;
> int bitpos = -1;
>
> asm("bsrl %1,%0 \n"
> "subl %2,%0 \n"
> "bsrl %3,%0 \n"
> : "+r" (bitpos)
> : "rm" (l), "i"(32), "rm" (h));
>
> return bitpos + 33;
> #endif
> }
>
> static inline __attribute__((const))
> int __ilog2_u32(u32 n)
> {
> return fls(n) - 1;
> }
>
> static inline __attribute__((const))
> int __ilog2_u64(u64 n)
> {
> return fls64(n) - 1;
> }
>
> extern __attribute__((const, noreturn))
> int ____ilog2_NaN(void);
>
> #define ilog2(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n) < 1 ? ____ilog2_NaN() : \
> (n) & (1ULL << 63) ? 63 : \
> (n) & (1ULL << 62) ? 62 : \
> (n) & (1ULL << 61) ? 61 : \
> (n) & (1ULL << 60) ? 60 : \
> (n) & (1ULL << 59) ? 59 : \
> (n) & (1ULL << 58) ? 58 : \
> (n) & (1ULL << 57) ? 57 : \
> (n) & (1ULL << 56) ? 56 : \
> (n) & (1ULL << 55) ? 55 : \
> (n) & (1ULL << 54) ? 54 : \
> (n) & (1ULL << 53) ? 53 : \
> (n) & (1ULL << 52) ? 52 : \
> (n) & (1ULL << 51) ? 51 : \
> (n) & (1ULL << 50) ? 50 : \
> (n) & (1ULL << 49) ? 49 : \
> (n) & (1ULL << 48) ? 48 : \
> (n) & (1ULL << 47) ? 47 : \
> (n) & (1ULL << 46) ? 46 : \
> (n) & (1ULL << 45) ? 45 : \
> (n) & (1ULL << 44) ? 44 : \
> (n) & (1ULL << 43) ? 43 : \
> (n) & (1ULL << 42) ? 42 : \
> (n) & (1ULL << 41) ? 41 : \
> (n) & (1ULL << 40) ? 40 : \
> (n) & (1ULL << 39) ? 39 : \
> (n) & (1ULL << 38) ? 38 : \
> (n) & (1ULL << 37) ? 37 : \
> (n) & (1ULL << 36) ? 36 : \
> (n) & (1ULL << 35) ? 35 : \
> (n) & (1ULL << 34) ? 34 : \
> (n) & (1ULL << 33) ? 33 : \
> (n) & (1ULL << 32) ? 32 : \
> (n) & (1ULL << 31) ? 31 : \
> (n) & (1ULL << 30) ? 30 : \
> (n) & (1ULL << 29) ? 29 : \
> (n) & (1ULL << 28) ? 28 : \
> (n) & (1ULL << 27) ? 27 : \
> (n) & (1ULL << 26) ? 26 : \
> (n) & (1ULL << 25) ? 25 : \
> (n) & (1ULL << 24) ? 24 : \
> (n) & (1ULL << 23) ? 23 : \
> (n) & (1ULL << 22) ? 22 : \
> (n) & (1ULL << 21) ? 21 : \
> (n) & (1ULL << 20) ? 20 : \
> (n) & (1ULL << 19) ? 19 : \
> (n) & (1ULL << 18) ? 18 : \
> (n) & (1ULL << 17) ? 17 : \
> (n) & (1ULL << 16) ? 16 : \
> (n) & (1ULL << 15) ? 15 : \
> (n) & (1ULL << 14) ? 14 : \
> (n) & (1ULL << 13) ? 13 : \
> (n) & (1ULL << 12) ? 12 : \
> (n) & (1ULL << 11) ? 11 : \
> (n) & (1ULL << 10) ? 10 : \
> (n) & (1ULL << 9) ? 9 : \
> (n) & (1ULL << 8) ? 8 : \
> (n) & (1ULL << 7) ? 7 : \
> (n) & (1ULL << 6) ? 6 : \
> (n) & (1ULL << 5) ? 5 : \
> (n) & (1ULL << 4) ? 4 : \
> (n) & (1ULL << 3) ? 3 : \
> (n) & (1ULL << 2) ? 2 : \
> (n) & (1ULL << 1) ? 1 : \
> (n) & (1ULL << 0) ? 0 : \
> ____ilog2_NaN() \
> ) : \
> (sizeof(n) <= 4) ? \
> __ilog2_u32(n) : \
> __ilog2_u64(n) \
> )
>
> static noinline __attribute__((const))
> int old_get_order(unsigned long size)
> {
> int order;
>
> size = (size - 1) >> (PAGE_SHIFT - 1);
> order = -1;
> do {
> size >>= 1;
> order++;
> } while (size);
> return order;
> }
>
> static noinline __attribute__((const))
> int __get_order(unsigned long size)
> {
> int order;
> size--;
> size >>= PAGE_SHIFT;
> #if BITS_PER_LONG == 32
> order = fls(size);
> #else
> order = fls64(size);
> #endif
> return order;
> }
>
> #define get_order(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> ilog2((n) - 1) - PAGE_SHIFT + 1) \
> ) : \
> __get_order(n) \
> )
>
> #define order(N) \
> { (1UL << N) - 1, get_order((1UL << N) - 1) }, \
> { (1UL << N), get_order((1UL << N)) }, \
> { (1UL << N) + 1, get_order((1UL << N) + 1) }
>
> struct order {
> unsigned long n, order;
> };
>
> static const struct order order_table[] = {
> order(0),
> order(1),
> order(2),
> order(3),
> order(4),
> order(5),
> order(6),
> order(7),
> order(8),
> order(9),
> order(10),
> order(11),
> order(12),
> order(13),
> order(14),
> order(15),
> order(16),
> order(17),
> order(18),
> order(19),
> order(20),
> order(21),
> order(22),
> order(23),
> order(24),
> order(25),
> order(26),
> order(27),
> order(28),
> order(29),
> order(30),
> order(31),
> #if BITS_PER_LONG == 64
> order(32),
> order(33),
> order(34),
> order(35),
> #endif
> { 0x2929 }
> };
>
> void check(int loop, unsigned long n)
> {
> unsigned long old, new;
>
> printf("[%2d]: %09lx | ", loop, n);
>
> old = old_get_order(n);
> new = get_order(n);
>
> printf("%3ld, %3ld\n", old, new);
> if (n != 0 && old != new)
> abort();
> }
>
> int main(int argc, char **argv)
> {
> const struct order *p;
> unsigned long n;
> int loop;
>
> for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
> n = 1UL << loop;
> check(loop, n - 1);
> check(loop, n);
> check(loop, n + 1);
> }
>
> for (p = order_table; p->n != 0x2929; p++) {
> unsigned long old, new;
>
> old = old_get_order(p->n);
> new = p->order;
> printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
> if (p->n != 0 && old != new)
> abort();
> }
>
> return 0;
> }
>
> Disassembling the x86_64 version of the above code shows:
>
> 0000000000400510 <old_get_order>:
> 400510: 48 83 ef 01 sub $0x1,%rdi
> 400514: b8 ff ff ff ff mov $0xffffffff,%eax
> 400519: 48 c1 ef 0b shr $0xb,%rdi
> 40051d: 0f 1f 00 nopl (%rax)
> 400520: 83 c0 01 add $0x1,%eax
> 400523: 48 d1 ef shr %rdi
> 400526: 75 f8 jne 400520 <old_get_order+0x10>
> 400528: f3 c3 repz retq
> 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>
> 0000000000400530 <__get_order>:
> 400530: 48 83 ef 01 sub $0x1,%rdi
> 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
> 40053b: 48 c1 ef 0c shr $0xc,%rdi
> 40053f: 48 0f bd c7 bsr %rdi,%rax
> 400543: 83 c0 01 add $0x1,%eax
> 400546: c3 retq
> 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
> 40054e: 00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
> include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
> 1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
> #ifndef __ASSEMBLY__
>
> #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> + int order;
> +
> + size--;
> + size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> + order = fls(size);
> +#else
> + order = fls64(size);
> +#endif
> + return order;
> +}
>
> /**
> * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
> * This function may be used to initialise variables with compile time
> * evaluations of constants.
> */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> - int order;
> -
> - size = (size - 1) >> (PAGE_SHIFT - 1);
> - order = -1;
> - do {
> - size >>= 1;
> - order++;
> - } while (size);
> - return order;
> -}
> +#define get_order(n) \
> +( \
> + __builtin_constant_p(n) ? ( \
> + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> + ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> + ilog2((n) - 1) - PAGE_SHIFT + 1) \
> + ) : \
> + __get_order(n) \
> +)
>
> #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
@ 2012-02-29 20:29 ` Paul Gortmaker
0 siblings, 0 replies; 9+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
Cc: linux-tip-commits, linux-next
On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author: David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer: H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()
This is causing build failures on non-x86 in linux next according to git bisect.
Here is one example:
http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/
Paul.
--
>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop. Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64(). It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result. This program
> tests both dynamic and static parameters.
>
> #include <stdlib.h>
> #include <stdio.h>
>
> #ifdef __x86_64__
> #define BITS_PER_LONG 64
> #else
> #define BITS_PER_LONG 32
> #endif
>
> #define PAGE_SHIFT 12
>
> typedef unsigned long long __u64, u64;
> typedef unsigned int __u32, u32;
> #define noinline __attribute__((noinline))
>
> static inline int fls(int x)
> {
> int bitpos = -1;
>
> asm("bsrl %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> }
>
> static __always_inline int fls64(__u64 x)
> {
> #if BITS_PER_LONG == 64
> long bitpos = -1;
>
> asm("bsrq %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> #else
> __u32 h = x >> 32, l = x;
> int bitpos = -1;
>
> asm("bsrl %1,%0 \n"
> "subl %2,%0 \n"
> "bsrl %3,%0 \n"
> : "+r" (bitpos)
> : "rm" (l), "i"(32), "rm" (h));
>
> return bitpos + 33;
> #endif
> }
>
> static inline __attribute__((const))
> int __ilog2_u32(u32 n)
> {
> return fls(n) - 1;
> }
>
> static inline __attribute__((const))
> int __ilog2_u64(u64 n)
> {
> return fls64(n) - 1;
> }
>
> extern __attribute__((const, noreturn))
> int ____ilog2_NaN(void);
>
> #define ilog2(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n) < 1 ? ____ilog2_NaN() : \
> (n) & (1ULL << 63) ? 63 : \
> (n) & (1ULL << 62) ? 62 : \
> (n) & (1ULL << 61) ? 61 : \
> (n) & (1ULL << 60) ? 60 : \
> (n) & (1ULL << 59) ? 59 : \
> (n) & (1ULL << 58) ? 58 : \
> (n) & (1ULL << 57) ? 57 : \
> (n) & (1ULL << 56) ? 56 : \
> (n) & (1ULL << 55) ? 55 : \
> (n) & (1ULL << 54) ? 54 : \
> (n) & (1ULL << 53) ? 53 : \
> (n) & (1ULL << 52) ? 52 : \
> (n) & (1ULL << 51) ? 51 : \
> (n) & (1ULL << 50) ? 50 : \
> (n) & (1ULL << 49) ? 49 : \
> (n) & (1ULL << 48) ? 48 : \
> (n) & (1ULL << 47) ? 47 : \
> (n) & (1ULL << 46) ? 46 : \
> (n) & (1ULL << 45) ? 45 : \
> (n) & (1ULL << 44) ? 44 : \
> (n) & (1ULL << 43) ? 43 : \
> (n) & (1ULL << 42) ? 42 : \
> (n) & (1ULL << 41) ? 41 : \
> (n) & (1ULL << 40) ? 40 : \
> (n) & (1ULL << 39) ? 39 : \
> (n) & (1ULL << 38) ? 38 : \
> (n) & (1ULL << 37) ? 37 : \
> (n) & (1ULL << 36) ? 36 : \
> (n) & (1ULL << 35) ? 35 : \
> (n) & (1ULL << 34) ? 34 : \
> (n) & (1ULL << 33) ? 33 : \
> (n) & (1ULL << 32) ? 32 : \
> (n) & (1ULL << 31) ? 31 : \
> (n) & (1ULL << 30) ? 30 : \
> (n) & (1ULL << 29) ? 29 : \
> (n) & (1ULL << 28) ? 28 : \
> (n) & (1ULL << 27) ? 27 : \
> (n) & (1ULL << 26) ? 26 : \
> (n) & (1ULL << 25) ? 25 : \
> (n) & (1ULL << 24) ? 24 : \
> (n) & (1ULL << 23) ? 23 : \
> (n) & (1ULL << 22) ? 22 : \
> (n) & (1ULL << 21) ? 21 : \
> (n) & (1ULL << 20) ? 20 : \
> (n) & (1ULL << 19) ? 19 : \
> (n) & (1ULL << 18) ? 18 : \
> (n) & (1ULL << 17) ? 17 : \
> (n) & (1ULL << 16) ? 16 : \
> (n) & (1ULL << 15) ? 15 : \
> (n) & (1ULL << 14) ? 14 : \
> (n) & (1ULL << 13) ? 13 : \
> (n) & (1ULL << 12) ? 12 : \
> (n) & (1ULL << 11) ? 11 : \
> (n) & (1ULL << 10) ? 10 : \
> (n) & (1ULL << 9) ? 9 : \
> (n) & (1ULL << 8) ? 8 : \
> (n) & (1ULL << 7) ? 7 : \
> (n) & (1ULL << 6) ? 6 : \
> (n) & (1ULL << 5) ? 5 : \
> (n) & (1ULL << 4) ? 4 : \
> (n) & (1ULL << 3) ? 3 : \
> (n) & (1ULL << 2) ? 2 : \
> (n) & (1ULL << 1) ? 1 : \
> (n) & (1ULL << 0) ? 0 : \
> ____ilog2_NaN() \
> ) : \
> (sizeof(n) <= 4) ? \
> __ilog2_u32(n) : \
> __ilog2_u64(n) \
> )
>
> static noinline __attribute__((const))
> int old_get_order(unsigned long size)
> {
> int order;
>
> size = (size - 1) >> (PAGE_SHIFT - 1);
> order = -1;
> do {
> size >>= 1;
> order++;
> } while (size);
> return order;
> }
>
> static noinline __attribute__((const))
> int __get_order(unsigned long size)
> {
> int order;
> size--;
> size >>= PAGE_SHIFT;
> #if BITS_PER_LONG == 32
> order = fls(size);
> #else
> order = fls64(size);
> #endif
> return order;
> }
>
> #define get_order(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> ilog2((n) - 1) - PAGE_SHIFT + 1) \
> ) : \
> __get_order(n) \
> )
>
> #define order(N) \
> { (1UL << N) - 1, get_order((1UL << N) - 1) }, \
> { (1UL << N), get_order((1UL << N)) }, \
> { (1UL << N) + 1, get_order((1UL << N) + 1) }
>
> struct order {
> unsigned long n, order;
> };
>
> static const struct order order_table[] = {
> order(0),
> order(1),
> order(2),
> order(3),
> order(4),
> order(5),
> order(6),
> order(7),
> order(8),
> order(9),
> order(10),
> order(11),
> order(12),
> order(13),
> order(14),
> order(15),
> order(16),
> order(17),
> order(18),
> order(19),
> order(20),
> order(21),
> order(22),
> order(23),
> order(24),
> order(25),
> order(26),
> order(27),
> order(28),
> order(29),
> order(30),
> order(31),
> #if BITS_PER_LONG == 64
> order(32),
> order(33),
> order(34),
> order(35),
> #endif
> { 0x2929 }
> };
>
> void check(int loop, unsigned long n)
> {
> unsigned long old, new;
>
> printf("[%2d]: %09lx | ", loop, n);
>
> old = old_get_order(n);
> new = get_order(n);
>
> printf("%3ld, %3ld\n", old, new);
> if (n != 0 && old != new)
> abort();
> }
>
> int main(int argc, char **argv)
> {
> const struct order *p;
> unsigned long n;
> int loop;
>
> for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
> n = 1UL << loop;
> check(loop, n - 1);
> check(loop, n);
> check(loop, n + 1);
> }
>
> for (p = order_table; p->n != 0x2929; p++) {
> unsigned long old, new;
>
> old = old_get_order(p->n);
> new = p->order;
> printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
> if (p->n != 0 && old != new)
> abort();
> }
>
> return 0;
> }
>
> Disassembling the x86_64 version of the above code shows:
>
> 0000000000400510 <old_get_order>:
> 400510: 48 83 ef 01 sub $0x1,%rdi
> 400514: b8 ff ff ff ff mov $0xffffffff,%eax
> 400519: 48 c1 ef 0b shr $0xb,%rdi
> 40051d: 0f 1f 00 nopl (%rax)
> 400520: 83 c0 01 add $0x1,%eax
> 400523: 48 d1 ef shr %rdi
> 400526: 75 f8 jne 400520 <old_get_order+0x10>
> 400528: f3 c3 repz retq
> 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>
> 0000000000400530 <__get_order>:
> 400530: 48 83 ef 01 sub $0x1,%rdi
> 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
> 40053b: 48 c1 ef 0c shr $0xc,%rdi
> 40053f: 48 0f bd c7 bsr %rdi,%rax
> 400543: 83 c0 01 add $0x1,%eax
> 400546: c3 retq
> 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
> 40054e: 00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
> include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
> 1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
> #ifndef __ASSEMBLY__
>
> #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> + int order;
> +
> + size--;
> + size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> + order = fls(size);
> +#else
> + order = fls64(size);
> +#endif
> + return order;
> +}
>
> /**
> * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
> * This function may be used to initialise variables with compile time
> * evaluations of constants.
> */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> - int order;
> -
> - size = (size - 1) >> (PAGE_SHIFT - 1);
> - order = -1;
> - do {
> - size >>= 1;
> - order++;
> - } while (size);
> - return order;
> -}
> +#define get_order(n) \
> +( \
> + __builtin_constant_p(n) ? ( \
> + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> + ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> + ilog2((n) - 1) - PAGE_SHIFT + 1) \
> + ) : \
> + __get_order(n) \
> +)
>
> #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-02-29 20:29 ` Paul Gortmaker
(?)
@ 2012-03-01 0:31 ` Stephen Rothwell
2012-03-01 21:02 ` H. Peter Anvin
2012-03-06 14:24 ` David Howells
-1 siblings, 2 replies; 9+ messages in thread
From: Stephen Rothwell @ 2012-03-01 0:31 UTC (permalink / raw)
To: Paul Gortmaker
Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits,
linux-next
[-- Attachment #1: Type: text/plain, Size: 1199 bytes --]
On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
> <dhowells@redhat.com> wrote:
> > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
> > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> > Author: David Howells <dhowells@redhat.com>
> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> > Committer: H. Peter Anvin <hpa@zytor.com>
> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
> >
> > bitops: Optimise get_order()
>
> This is causing build failures on non-x86 in linux next according to git bisect.
Presumably it needs to include linux/bitops.h (and see below).
> > +static inline __attribute_const__
> > +int __get_order(unsigned long size)
> > +{
> > + int order;
> > +
> > + size--;
> > + size >>= PAGE_SHIFT;
> > +#if BITS_PER_LONG == 32
> > + order = fls(size);
> > +#else
> > + order = fls64(size);
> > +#endif
linux/bitops.h has fls_long() that does this size test and calls the right thing.
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-03-01 0:31 ` Stephen Rothwell
@ 2012-03-01 21:02 ` H. Peter Anvin
2012-03-06 14:24 ` David Howells
1 sibling, 0 replies; 9+ messages in thread
From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx,
linux-tip-commits, linux-next
David, could you make a fix for this?
-hpa
On 02/29/2012 04:31 PM, Stephen Rothwell wrote:
> On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com> wrote:
>>
>> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
>> <dhowells@redhat.com> wrote:
>>> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Author: David Howells<dhowells@redhat.com>
>>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
>>> Committer: H. Peter Anvin<hpa@zytor.com>
>>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>>>
>>> bitops: Optimise get_order()
>>
>> This is causing build failures on non-x86 in linux next according to git bisect.
>
> Presumably it needs to include linux/bitops.h (and see below).
>
>>> +static inline __attribute_const__
>>> +int __get_order(unsigned long size)
>>> +{
>>> + int order;
>>> +
>>> + size--;
>>> + size>>= PAGE_SHIFT;
>>> +#if BITS_PER_LONG == 32
>>> + order = fls(size);
>>> +#else
>>> + order = fls64(size);
>>> +#endif
>
> linux/bitops.h has fls_long() that does this size test and calls the right thing.
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-03-01 0:31 ` Stephen Rothwell
2012-03-01 21:02 ` H. Peter Anvin
@ 2012-03-06 14:24 ` David Howells
1 sibling, 0 replies; 9+ messages in thread
From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw)
To: Stephen Rothwell
Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx,
linux-tip-commits, linux-next
Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> Presumably it needs to include linux/bitops.h (and see below).
That won't help. It already includes that via linux/log2.h.
The error chain looks suspiciously like a circular dependency has occurred.
David
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2012-03-06 14:25 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-29 20:29 ` Paul Gortmaker
2012-02-29 20:29 ` Paul Gortmaker
2012-03-01 0:31 ` Stephen Rothwell
2012-03-01 21:02 ` H. Peter Anvin
2012-03-06 14:24 ` David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.