* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case
@ 2012-02-20 22:39 David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
0 siblings, 2 replies; 8+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann
Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
include/asm-generic/getorder.h | 23 ++++++++++++++++++++++-
1 files changed, 22 insertions(+), 1 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
#include <linux/compiler.h>
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory. This
+ * is on a logarithmic scale, where:
+ *
+ * 0 -> 2^0 * PAGE_SIZE and below
+ * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ * ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
static inline __attribute_const__ int get_order(unsigned long size)
{
int order;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] Optimise get_order()
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
@ 2012-02-20 22:39 ` David Howells
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
1 sibling, 1 reply; 8+ messages in thread
From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw)
To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann
Optimise get_order() to use bit scanning instructions if such exist rather than
a loop. Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.
This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64(). It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result. This program
tests both dynamic and static parameters.
#include <stdlib.h>
#include <stdio.h>
#ifdef __x86_64__
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif
#define PAGE_SHIFT 12
typedef unsigned long long __u64, u64;
typedef unsigned int __u32, u32;
#define noinline __attribute__((noinline))
static inline int fls(int x)
{
int bitpos = -1;
asm("bsrl %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
}
static __always_inline int fls64(__u64 x)
{
#if BITS_PER_LONG == 64
long bitpos = -1;
asm("bsrq %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
#else
__u32 h = x >> 32, l = x;
int bitpos = -1;
asm("bsrl %1,%0 \n"
"subl %2,%0 \n"
"bsrl %3,%0 \n"
: "+r" (bitpos)
: "rm" (l), "i"(32), "rm" (h));
return bitpos + 33;
#endif
}
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
static noinline __attribute__((const))
int old_get_order(unsigned long size)
{
int order;
size = (size - 1) >> (PAGE_SHIFT - 1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
static noinline __attribute__((const))
int __get_order(unsigned long size)
{
int order;
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
((n < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#define order(N) \
{ (1UL << N) - 1, get_order((1UL << N) - 1) }, \
{ (1UL << N), get_order((1UL << N)) }, \
{ (1UL << N) + 1, get_order((1UL << N) + 1) }
struct order {
unsigned long n, order;
};
static const struct order order_table[] = {
order(0),
order(1),
order(2),
order(3),
order(4),
order(5),
order(6),
order(7),
order(8),
order(9),
order(10),
order(11),
order(12),
order(13),
order(14),
order(15),
order(16),
order(17),
order(18),
order(19),
order(20),
order(21),
order(22),
order(23),
order(24),
order(25),
order(26),
order(27),
order(28),
order(29),
order(30),
order(31),
#if BITS_PER_LONG == 64
order(32),
order(33),
order(34),
order(35),
#endif
{ 0x2929 }
};
void check(int loop, unsigned long n)
{
unsigned long old, new;
printf("[%2d]: %09lx | ", loop, n);
old = old_get_order(n);
new = get_order(n);
printf("%3ld, %3ld\n", old, new);
if (n != 0 && old != new)
abort();
}
int main(int argc, char **argv)
{
const struct order *p;
unsigned long n;
int loop;
for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
n = 1UL << loop;
check(loop, n - 1);
check(loop, n);
check(loop, n + 1);
}
for (p = order_table; p->n != 0x2929; p++) {
unsigned long old, new;
old = old_get_order(p->n);
new = p->order;
printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
if (p->n != 0 && old != new)
abort();
}
return 0;
}
Disassembling the x86_64 version of the above code shows:
0000000000400510 <old_get_order>:
400510: 48 83 ef 01 sub $0x1,%rdi
400514: b8 ff ff ff ff mov $0xffffffff,%eax
400519: 48 c1 ef 0b shr $0xb,%rdi
40051d: 0f 1f 00 nopl (%rax)
400520: 83 c0 01 add $0x1,%eax
400523: 48 d1 ef shr %rdi
400526: 75 f8 jne 400520 <old_get_order+0x10>
400528: f3 c3 repz retq
40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400530 <__get_order>:
400530: 48 83 ef 01 sub $0x1,%rdi
400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
40053b: 48 c1 ef 0c shr $0xc,%rdi
40053f: 48 0f bd c7 bsr %rdi,%rax
400543: 83 c0 01 add $0x1,%eax
400546: c3 retq
400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40054e: 00 00
As can be seen, the new __get_order() function is simpler than the
old_get_order() function.
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
1 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+ int order;
+
+ size--;
+ size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+ order = fls(size);
+#else
+ order = fls64(size);
+#endif
+ return order;
+}
/**
* get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
-static inline __attribute_const__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size - 1) >> (PAGE_SHIFT - 1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
+#define get_order(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
+ ((n < (1UL << PAGE_SHIFT)) ? 0 : \
+ ilog2((n) - 1) - PAGE_SHIFT + 1) \
+ ) : \
+ __get_order(n) \
+)
#endif /* __ASSEMBLY__ */
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:19 ` tip-bot for David Howells
1 sibling, 0 replies; 8+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw)
To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx
Commit-ID: e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Gitweb: http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489
Author: David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000
Committer: H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:46:55 -0800
bitops: Adjust the comment on get_order() to describe the size==0 case
Adjust the comment on get_order() to note that the result of passing a size of
0 results in an undefined value.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
include/asm-generic/getorder.h | 23 ++++++++++++++++++++++-
1 files changed, 22 insertions(+), 1 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 67e7245..76e9687 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -5,7 +5,28 @@
#include <linux/compiler.h>
-/* Pure 2^n version of get_order */
+/**
+ * get_order - Determine the allocation order of a memory size
+ * @size: The size for which to get the order
+ *
+ * Determine the allocation order of a particular sized block of memory. This
+ * is on a logarithmic scale, where:
+ *
+ * 0 -> 2^0 * PAGE_SIZE and below
+ * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
+ * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
+ * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
+ * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
+ * ...
+ *
+ * The order returned is used to find the smallest allocation granule required
+ * to hold an object of the specified size.
+ *
+ * The result is undefined if the size is 0.
+ *
+ * This function may be used to initialise variables with compile time
+ * evaluations of constants.
+ */
static inline __attribute_const__ int get_order(unsigned long size)
{
int order;
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip:x86/asm] bitops: Optimise get_order()
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
@ 2012-02-20 23:20 ` tip-bot for David Howells
2012-02-29 20:29 ` Paul Gortmaker
0 siblings, 1 reply; 8+ messages in thread
From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw)
To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx
Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
Author: David Howells <dhowells@redhat.com>
AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
Committer: H. Peter Anvin <hpa@zytor.com>
CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
bitops: Optimise get_order()
Optimise get_order() to use bit scanning instructions if such exist rather than
a loop. Also, make it possible to use get_order() in static initialisations
too by building it on top of ilog2() in the constant parameter case.
This has been tested for i386 and x86_64 using the following userspace program,
and for FRV by making appropriate substitutions for fls() and fls64(). It will
abort if the case for get_order() deviates from the original except for the
order of 0, for which get_order() produces an undefined result. This program
tests both dynamic and static parameters.
#include <stdlib.h>
#include <stdio.h>
#ifdef __x86_64__
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif
#define PAGE_SHIFT 12
typedef unsigned long long __u64, u64;
typedef unsigned int __u32, u32;
#define noinline __attribute__((noinline))
static inline int fls(int x)
{
int bitpos = -1;
asm("bsrl %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
}
static __always_inline int fls64(__u64 x)
{
#if BITS_PER_LONG == 64
long bitpos = -1;
asm("bsrq %1,%0"
: "+r" (bitpos)
: "rm" (x));
return bitpos + 1;
#else
__u32 h = x >> 32, l = x;
int bitpos = -1;
asm("bsrl %1,%0 \n"
"subl %2,%0 \n"
"bsrl %3,%0 \n"
: "+r" (bitpos)
: "rm" (l), "i"(32), "rm" (h));
return bitpos + 33;
#endif
}
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return fls(n) - 1;
}
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return fls64(n) - 1;
}
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
static noinline __attribute__((const))
int old_get_order(unsigned long size)
{
int order;
size = (size - 1) >> (PAGE_SHIFT - 1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
static noinline __attribute__((const))
int __get_order(unsigned long size)
{
int order;
size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
(n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
((n < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#define order(N) \
{ (1UL << N) - 1, get_order((1UL << N) - 1) }, \
{ (1UL << N), get_order((1UL << N)) }, \
{ (1UL << N) + 1, get_order((1UL << N) + 1) }
struct order {
unsigned long n, order;
};
static const struct order order_table[] = {
order(0),
order(1),
order(2),
order(3),
order(4),
order(5),
order(6),
order(7),
order(8),
order(9),
order(10),
order(11),
order(12),
order(13),
order(14),
order(15),
order(16),
order(17),
order(18),
order(19),
order(20),
order(21),
order(22),
order(23),
order(24),
order(25),
order(26),
order(27),
order(28),
order(29),
order(30),
order(31),
#if BITS_PER_LONG == 64
order(32),
order(33),
order(34),
order(35),
#endif
{ 0x2929 }
};
void check(int loop, unsigned long n)
{
unsigned long old, new;
printf("[%2d]: %09lx | ", loop, n);
old = old_get_order(n);
new = get_order(n);
printf("%3ld, %3ld\n", old, new);
if (n != 0 && old != new)
abort();
}
int main(int argc, char **argv)
{
const struct order *p;
unsigned long n;
int loop;
for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
n = 1UL << loop;
check(loop, n - 1);
check(loop, n);
check(loop, n + 1);
}
for (p = order_table; p->n != 0x2929; p++) {
unsigned long old, new;
old = old_get_order(p->n);
new = p->order;
printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
if (p->n != 0 && old != new)
abort();
}
return 0;
}
Disassembling the x86_64 version of the above code shows:
0000000000400510 <old_get_order>:
400510: 48 83 ef 01 sub $0x1,%rdi
400514: b8 ff ff ff ff mov $0xffffffff,%eax
400519: 48 c1 ef 0b shr $0xb,%rdi
40051d: 0f 1f 00 nopl (%rax)
400520: 83 c0 01 add $0x1,%eax
400523: 48 d1 ef shr %rdi
400526: 75 f8 jne 400520 <old_get_order+0x10>
400528: f3 c3 repz retq
40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
0000000000400530 <__get_order>:
400530: 48 83 ef 01 sub $0x1,%rdi
400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
40053b: 48 c1 ef 0c shr $0xc,%rdi
40053f: 48 0f bd c7 bsr %rdi,%rax
400543: 83 c0 01 add $0x1,%eax
400546: c3 retq
400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
40054e: 00 00
As can be seen, the new __get_order() function is simpler than the
old_get_order() function.
Signed-off-by: David Howells <dhowells@redhat.com>
Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
1 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index 76e9687..e0fb4bf 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -4,6 +4,25 @@
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
+#include <linux/log2.h>
+
+/*
+ * Runtime evaluation of get_order()
+ */
+static inline __attribute_const__
+int __get_order(unsigned long size)
+{
+ int order;
+
+ size--;
+ size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+ order = fls(size);
+#else
+ order = fls64(size);
+#endif
+ return order;
+}
/**
* get_order - Determine the allocation order of a memory size
@@ -27,18 +46,15 @@
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
-static inline __attribute_const__ int get_order(unsigned long size)
-{
- int order;
-
- size = (size - 1) >> (PAGE_SHIFT - 1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
-}
+#define get_order(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
+ ((n < (1UL << PAGE_SHIFT)) ? 0 : \
+ ilog2((n) - 1) - PAGE_SHIFT + 1) \
+ ) : \
+ __get_order(n) \
+)
#endif /* __ASSEMBLY__ */
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
@ 2012-02-29 20:29 ` Paul Gortmaker
2012-03-01 0:31 ` Stephen Rothwell
2012-03-06 14:24 ` David Howells
0 siblings, 2 replies; 8+ messages in thread
From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw)
To: mingo, hpa, linux-kernel, arnd, dhowells, tglx
Cc: linux-tip-commits, linux-next
On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
<dhowells@redhat.com> wrote:
> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> Author: David Howells <dhowells@redhat.com>
> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> Committer: H. Peter Anvin <hpa@zytor.com>
> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>
> bitops: Optimise get_order()
This is causing build failures on non-x86 in linux next according to git bisect.
Here is one example:
http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/
Paul.
--
>
> Optimise get_order() to use bit scanning instructions if such exist rather than
> a loop. Also, make it possible to use get_order() in static initialisations
> too by building it on top of ilog2() in the constant parameter case.
>
> This has been tested for i386 and x86_64 using the following userspace program,
> and for FRV by making appropriate substitutions for fls() and fls64(). It will
> abort if the case for get_order() deviates from the original except for the
> order of 0, for which get_order() produces an undefined result. This program
> tests both dynamic and static parameters.
>
> #include <stdlib.h>
> #include <stdio.h>
>
> #ifdef __x86_64__
> #define BITS_PER_LONG 64
> #else
> #define BITS_PER_LONG 32
> #endif
>
> #define PAGE_SHIFT 12
>
> typedef unsigned long long __u64, u64;
> typedef unsigned int __u32, u32;
> #define noinline __attribute__((noinline))
>
> static inline int fls(int x)
> {
> int bitpos = -1;
>
> asm("bsrl %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> }
>
> static __always_inline int fls64(__u64 x)
> {
> #if BITS_PER_LONG == 64
> long bitpos = -1;
>
> asm("bsrq %1,%0"
> : "+r" (bitpos)
> : "rm" (x));
> return bitpos + 1;
> #else
> __u32 h = x >> 32, l = x;
> int bitpos = -1;
>
> asm("bsrl %1,%0 \n"
> "subl %2,%0 \n"
> "bsrl %3,%0 \n"
> : "+r" (bitpos)
> : "rm" (l), "i"(32), "rm" (h));
>
> return bitpos + 33;
> #endif
> }
>
> static inline __attribute__((const))
> int __ilog2_u32(u32 n)
> {
> return fls(n) - 1;
> }
>
> static inline __attribute__((const))
> int __ilog2_u64(u64 n)
> {
> return fls64(n) - 1;
> }
>
> extern __attribute__((const, noreturn))
> int ____ilog2_NaN(void);
>
> #define ilog2(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n) < 1 ? ____ilog2_NaN() : \
> (n) & (1ULL << 63) ? 63 : \
> (n) & (1ULL << 62) ? 62 : \
> (n) & (1ULL << 61) ? 61 : \
> (n) & (1ULL << 60) ? 60 : \
> (n) & (1ULL << 59) ? 59 : \
> (n) & (1ULL << 58) ? 58 : \
> (n) & (1ULL << 57) ? 57 : \
> (n) & (1ULL << 56) ? 56 : \
> (n) & (1ULL << 55) ? 55 : \
> (n) & (1ULL << 54) ? 54 : \
> (n) & (1ULL << 53) ? 53 : \
> (n) & (1ULL << 52) ? 52 : \
> (n) & (1ULL << 51) ? 51 : \
> (n) & (1ULL << 50) ? 50 : \
> (n) & (1ULL << 49) ? 49 : \
> (n) & (1ULL << 48) ? 48 : \
> (n) & (1ULL << 47) ? 47 : \
> (n) & (1ULL << 46) ? 46 : \
> (n) & (1ULL << 45) ? 45 : \
> (n) & (1ULL << 44) ? 44 : \
> (n) & (1ULL << 43) ? 43 : \
> (n) & (1ULL << 42) ? 42 : \
> (n) & (1ULL << 41) ? 41 : \
> (n) & (1ULL << 40) ? 40 : \
> (n) & (1ULL << 39) ? 39 : \
> (n) & (1ULL << 38) ? 38 : \
> (n) & (1ULL << 37) ? 37 : \
> (n) & (1ULL << 36) ? 36 : \
> (n) & (1ULL << 35) ? 35 : \
> (n) & (1ULL << 34) ? 34 : \
> (n) & (1ULL << 33) ? 33 : \
> (n) & (1ULL << 32) ? 32 : \
> (n) & (1ULL << 31) ? 31 : \
> (n) & (1ULL << 30) ? 30 : \
> (n) & (1ULL << 29) ? 29 : \
> (n) & (1ULL << 28) ? 28 : \
> (n) & (1ULL << 27) ? 27 : \
> (n) & (1ULL << 26) ? 26 : \
> (n) & (1ULL << 25) ? 25 : \
> (n) & (1ULL << 24) ? 24 : \
> (n) & (1ULL << 23) ? 23 : \
> (n) & (1ULL << 22) ? 22 : \
> (n) & (1ULL << 21) ? 21 : \
> (n) & (1ULL << 20) ? 20 : \
> (n) & (1ULL << 19) ? 19 : \
> (n) & (1ULL << 18) ? 18 : \
> (n) & (1ULL << 17) ? 17 : \
> (n) & (1ULL << 16) ? 16 : \
> (n) & (1ULL << 15) ? 15 : \
> (n) & (1ULL << 14) ? 14 : \
> (n) & (1ULL << 13) ? 13 : \
> (n) & (1ULL << 12) ? 12 : \
> (n) & (1ULL << 11) ? 11 : \
> (n) & (1ULL << 10) ? 10 : \
> (n) & (1ULL << 9) ? 9 : \
> (n) & (1ULL << 8) ? 8 : \
> (n) & (1ULL << 7) ? 7 : \
> (n) & (1ULL << 6) ? 6 : \
> (n) & (1ULL << 5) ? 5 : \
> (n) & (1ULL << 4) ? 4 : \
> (n) & (1ULL << 3) ? 3 : \
> (n) & (1ULL << 2) ? 2 : \
> (n) & (1ULL << 1) ? 1 : \
> (n) & (1ULL << 0) ? 0 : \
> ____ilog2_NaN() \
> ) : \
> (sizeof(n) <= 4) ? \
> __ilog2_u32(n) : \
> __ilog2_u64(n) \
> )
>
> static noinline __attribute__((const))
> int old_get_order(unsigned long size)
> {
> int order;
>
> size = (size - 1) >> (PAGE_SHIFT - 1);
> order = -1;
> do {
> size >>= 1;
> order++;
> } while (size);
> return order;
> }
>
> static noinline __attribute__((const))
> int __get_order(unsigned long size)
> {
> int order;
> size--;
> size >>= PAGE_SHIFT;
> #if BITS_PER_LONG == 32
> order = fls(size);
> #else
> order = fls64(size);
> #endif
> return order;
> }
>
> #define get_order(n) \
> ( \
> __builtin_constant_p(n) ? ( \
> (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> ilog2((n) - 1) - PAGE_SHIFT + 1) \
> ) : \
> __get_order(n) \
> )
>
> #define order(N) \
> { (1UL << N) - 1, get_order((1UL << N) - 1) }, \
> { (1UL << N), get_order((1UL << N)) }, \
> { (1UL << N) + 1, get_order((1UL << N) + 1) }
>
> struct order {
> unsigned long n, order;
> };
>
> static const struct order order_table[] = {
> order(0),
> order(1),
> order(2),
> order(3),
> order(4),
> order(5),
> order(6),
> order(7),
> order(8),
> order(9),
> order(10),
> order(11),
> order(12),
> order(13),
> order(14),
> order(15),
> order(16),
> order(17),
> order(18),
> order(19),
> order(20),
> order(21),
> order(22),
> order(23),
> order(24),
> order(25),
> order(26),
> order(27),
> order(28),
> order(29),
> order(30),
> order(31),
> #if BITS_PER_LONG == 64
> order(32),
> order(33),
> order(34),
> order(35),
> #endif
> { 0x2929 }
> };
>
> void check(int loop, unsigned long n)
> {
> unsigned long old, new;
>
> printf("[%2d]: %09lx | ", loop, n);
>
> old = old_get_order(n);
> new = get_order(n);
>
> printf("%3ld, %3ld\n", old, new);
> if (n != 0 && old != new)
> abort();
> }
>
> int main(int argc, char **argv)
> {
> const struct order *p;
> unsigned long n;
> int loop;
>
> for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) {
> n = 1UL << loop;
> check(loop, n - 1);
> check(loop, n);
> check(loop, n + 1);
> }
>
> for (p = order_table; p->n != 0x2929; p++) {
> unsigned long old, new;
>
> old = old_get_order(p->n);
> new = p->order;
> printf("%09lx\t%3ld, %3ld\n", p->n, old, new);
> if (p->n != 0 && old != new)
> abort();
> }
>
> return 0;
> }
>
> Disassembling the x86_64 version of the above code shows:
>
> 0000000000400510 <old_get_order>:
> 400510: 48 83 ef 01 sub $0x1,%rdi
> 400514: b8 ff ff ff ff mov $0xffffffff,%eax
> 400519: 48 c1 ef 0b shr $0xb,%rdi
> 40051d: 0f 1f 00 nopl (%rax)
> 400520: 83 c0 01 add $0x1,%eax
> 400523: 48 d1 ef shr %rdi
> 400526: 75 f8 jne 400520 <old_get_order+0x10>
> 400528: f3 c3 repz retq
> 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
>
> 0000000000400530 <__get_order>:
> 400530: 48 83 ef 01 sub $0x1,%rdi
> 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax
> 40053b: 48 c1 ef 0c shr $0xc,%rdi
> 40053f: 48 0f bd c7 bsr %rdi,%rax
> 400543: 83 c0 01 add $0x1,%eax
> 400546: c3 retq
> 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
> 40054e: 00 00
>
> As can be seen, the new __get_order() function is simpler than the
> old_get_order() function.
>
> Signed-off-by: David Howells <dhowells@redhat.com>
> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk
> Acked-by: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
> ---
> include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------
> 1 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
> index 76e9687..e0fb4bf 100644
> --- a/include/asm-generic/getorder.h
> +++ b/include/asm-generic/getorder.h
> @@ -4,6 +4,25 @@
> #ifndef __ASSEMBLY__
>
> #include <linux/compiler.h>
> +#include <linux/log2.h>
> +
> +/*
> + * Runtime evaluation of get_order()
> + */
> +static inline __attribute_const__
> +int __get_order(unsigned long size)
> +{
> + int order;
> +
> + size--;
> + size >>= PAGE_SHIFT;
> +#if BITS_PER_LONG == 32
> + order = fls(size);
> +#else
> + order = fls64(size);
> +#endif
> + return order;
> +}
>
> /**
> * get_order - Determine the allocation order of a memory size
> @@ -27,18 +46,15 @@
> * This function may be used to initialise variables with compile time
> * evaluations of constants.
> */
> -static inline __attribute_const__ int get_order(unsigned long size)
> -{
> - int order;
> -
> - size = (size - 1) >> (PAGE_SHIFT - 1);
> - order = -1;
> - do {
> - size >>= 1;
> - order++;
> - } while (size);
> - return order;
> -}
> +#define get_order(n) \
> +( \
> + __builtin_constant_p(n) ? ( \
> + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
> + ((n < (1UL << PAGE_SHIFT)) ? 0 : \
> + ilog2((n) - 1) - PAGE_SHIFT + 1) \
> + ) : \
> + __get_order(n) \
> +)
>
> #endif /* __ASSEMBLY__ */
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-02-29 20:29 ` Paul Gortmaker
@ 2012-03-01 0:31 ` Stephen Rothwell
2012-03-01 21:02 ` H. Peter Anvin
2012-03-06 14:24 ` David Howells
1 sibling, 1 reply; 8+ messages in thread
From: Stephen Rothwell @ 2012-03-01 0:31 UTC (permalink / raw)
To: Paul Gortmaker
Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits,
linux-next
[-- Attachment #1: Type: text/plain, Size: 1199 bytes --]
On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote:
>
> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
> <dhowells@redhat.com> wrote:
> > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
> > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
> > Author: David Howells <dhowells@redhat.com>
> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
> > Committer: H. Peter Anvin <hpa@zytor.com>
> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
> >
> > bitops: Optimise get_order()
>
> This is causing build failures on non-x86 in linux next according to git bisect.
Presumably it needs to include linux/bitops.h (and see below).
> > +static inline __attribute_const__
> > +int __get_order(unsigned long size)
> > +{
> > + int order;
> > +
> > + size--;
> > + size >>= PAGE_SHIFT;
> > +#if BITS_PER_LONG == 32
> > + order = fls(size);
> > +#else
> > + order = fls64(size);
> > +#endif
linux/bitops.h has fls_long() that does this size test and calls the right thing.
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-03-01 0:31 ` Stephen Rothwell
@ 2012-03-01 21:02 ` H. Peter Anvin
0 siblings, 0 replies; 8+ messages in thread
From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw)
To: Stephen Rothwell
Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx,
linux-tip-commits, linux-next
David, could you make a fix for this?
-hpa
On 02/29/2012 04:31 PM, Stephen Rothwell wrote:
> On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com> wrote:
>>
>> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells
>> <dhowells@redhat.com> wrote:
>>> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e
>>> Author: David Howells<dhowells@redhat.com>
>>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000
>>> Committer: H. Peter Anvin<hpa@zytor.com>
>>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800
>>>
>>> bitops: Optimise get_order()
>>
>> This is causing build failures on non-x86 in linux next according to git bisect.
>
> Presumably it needs to include linux/bitops.h (and see below).
>
>>> +static inline __attribute_const__
>>> +int __get_order(unsigned long size)
>>> +{
>>> + int order;
>>> +
>>> + size--;
>>> + size>>= PAGE_SHIFT;
>>> +#if BITS_PER_LONG == 32
>>> + order = fls(size);
>>> +#else
>>> + order = fls64(size);
>>> +#endif
>
> linux/bitops.h has fls_long() that does this size test and calls the right thing.
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order()
2012-02-29 20:29 ` Paul Gortmaker
2012-03-01 0:31 ` Stephen Rothwell
@ 2012-03-06 14:24 ` David Howells
1 sibling, 0 replies; 8+ messages in thread
From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw)
To: Stephen Rothwell
Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx,
linux-tip-commits, linux-next
Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> Presumably it needs to include linux/bitops.h (and see below).
That won't help. It already includes that via linux/log2.h.
The error chain looks suspiciously like a circular dependency has occurred.
David
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-03-06 14:25 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells
2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells
2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells
2012-02-29 20:29 ` Paul Gortmaker
2012-03-01 0:31 ` Stephen Rothwell
2012-03-01 21:02 ` H. Peter Anvin
2012-03-06 14:24 ` David Howells
2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).