* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case @ 2012-02-20 22:39 David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells 0 siblings, 2 replies; 8+ messages in thread From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw) To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann Adjust the comment on get_order() to note that the result of passing a size of 0 results in an undefined value. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> --- include/asm-generic/getorder.h | 23 ++++++++++++++++++++++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 67e7245..76e9687 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -5,7 +5,28 @@ #include <linux/compiler.h> -/* Pure 2^n version of get_order */ +/** + * get_order - Determine the allocation order of a memory size + * @size: The size for which to get the order + * + * Determine the allocation order of a particular sized block of memory. This + * is on a logarithmic scale, where: + * + * 0 -> 2^0 * PAGE_SIZE and below + * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 + * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 + * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 + * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 + * ... + * + * The order returned is used to find the smallest allocation granule required + * to hold an object of the specified size. + * + * The result is undefined if the size is 0. + * + * This function may be used to initialise variables with compile time + * evaluations of constants. + */ static inline __attribute_const__ int get_order(unsigned long size) { int order; ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] Optimise get_order() 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells @ 2012-02-20 22:39 ` David Howells 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells 1 sibling, 1 reply; 8+ messages in thread From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw) To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann Optimise get_order() to use bit scanning instructions if such exist rather than a loop. Also, make it possible to use get_order() in static initialisations too by building it on top of ilog2() in the constant parameter case. This has been tested for i386 and x86_64 using the following userspace program, and for FRV by making appropriate substitutions for fls() and fls64(). It will abort if the case for get_order() deviates from the original except for the order of 0, for which get_order() produces an undefined result. This program tests both dynamic and static parameters. #include <stdlib.h> #include <stdio.h> #ifdef __x86_64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define PAGE_SHIFT 12 typedef unsigned long long __u64, u64; typedef unsigned int __u32, u32; #define noinline __attribute__((noinline)) static inline int fls(int x) { int bitpos = -1; asm("bsrl %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } static __always_inline int fls64(__u64 x) { #if BITS_PER_LONG == 64 long bitpos = -1; asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; #else __u32 h = x >> 32, l = x; int bitpos = -1; asm("bsrl %1,%0 \n" "subl %2,%0 \n" "bsrl %3,%0 \n" : "+r" (bitpos) : "rm" (l), "i"(32), "rm" (h)); return bitpos + 33; #endif } static inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } static inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) static noinline __attribute__((const)) int old_get_order(unsigned long size) { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } static noinline __attribute__((const)) int __get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 order = fls(size); #else order = fls64(size); #endif return order; } #define get_order(n) \ ( \ __builtin_constant_p(n) ? ( \ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ ((n < (1UL << PAGE_SHIFT)) ? 0 : \ ilog2((n) - 1) - PAGE_SHIFT + 1) \ ) : \ __get_order(n) \ ) #define order(N) \ { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ { (1UL << N), get_order((1UL << N)) }, \ { (1UL << N) + 1, get_order((1UL << N) + 1) } struct order { unsigned long n, order; }; static const struct order order_table[] = { order(0), order(1), order(2), order(3), order(4), order(5), order(6), order(7), order(8), order(9), order(10), order(11), order(12), order(13), order(14), order(15), order(16), order(17), order(18), order(19), order(20), order(21), order(22), order(23), order(24), order(25), order(26), order(27), order(28), order(29), order(30), order(31), #if BITS_PER_LONG == 64 order(32), order(33), order(34), order(35), #endif { 0x2929 } }; void check(int loop, unsigned long n) { unsigned long old, new; printf("[%2d]: %09lx | ", loop, n); old = old_get_order(n); new = get_order(n); printf("%3ld, %3ld\n", old, new); if (n != 0 && old != new) abort(); } int main(int argc, char **argv) { const struct order *p; unsigned long n; int loop; for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { n = 1UL << loop; check(loop, n - 1); check(loop, n); check(loop, n + 1); } for (p = order_table; p->n != 0x2929; p++) { unsigned long old, new; old = old_get_order(p->n); new = p->order; printf("%09lx\t%3ld, %3ld\n", p->n, old, new); if (p->n != 0 && old != new) abort(); } return 0; } Disassembling the x86_64 version of the above code shows: 0000000000400510 <old_get_order>: 400510: 48 83 ef 01 sub $0x1,%rdi 400514: b8 ff ff ff ff mov $0xffffffff,%eax 400519: 48 c1 ef 0b shr $0xb,%rdi 40051d: 0f 1f 00 nopl (%rax) 400520: 83 c0 01 add $0x1,%eax 400523: 48 d1 ef shr %rdi 400526: 75 f8 jne 400520 <old_get_order+0x10> 400528: f3 c3 repz retq 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0000000000400530 <__get_order>: 400530: 48 83 ef 01 sub $0x1,%rdi 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 40053b: 48 c1 ef 0c shr $0xc,%rdi 40053f: 48 0f bd c7 bsr %rdi,%rax 400543: 83 c0 01 add $0x1,%eax 400546: c3 retq 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 40054e: 00 00 As can be seen, the new __get_order() function is simpler than the old_get_order() function. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> --- include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ 1 files changed, 28 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 76e9687..e0fb4bf 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -4,6 +4,25 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <linux/log2.h> + +/* + * Runtime evaluation of get_order() + */ +static inline __attribute_const__ +int __get_order(unsigned long size) +{ + int order; + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} /** * get_order - Determine the allocation order of a memory size @@ -27,18 +46,15 @@ * This function may be used to initialise variables with compile time * evaluations of constants. */ -static inline __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} +#define get_order(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ + ilog2((n) - 1) - PAGE_SHIFT + 1) \ + ) : \ + __get_order(n) \ +) #endif /* __ASSEMBLY__ */ ^ permalink raw reply related [flat|nested] 8+ messages in thread
* [tip:x86/asm] bitops: Optimise get_order() 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells @ 2012-02-20 23:20 ` tip-bot for David Howells 2012-02-29 20:29 ` Paul Gortmaker 0 siblings, 1 reply; 8+ messages in thread From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw) To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e Author: David Howells <dhowells@redhat.com> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 Committer: H. Peter Anvin <hpa@zytor.com> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 bitops: Optimise get_order() Optimise get_order() to use bit scanning instructions if such exist rather than a loop. Also, make it possible to use get_order() in static initialisations too by building it on top of ilog2() in the constant parameter case. This has been tested for i386 and x86_64 using the following userspace program, and for FRV by making appropriate substitutions for fls() and fls64(). It will abort if the case for get_order() deviates from the original except for the order of 0, for which get_order() produces an undefined result. This program tests both dynamic and static parameters. #include <stdlib.h> #include <stdio.h> #ifdef __x86_64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define PAGE_SHIFT 12 typedef unsigned long long __u64, u64; typedef unsigned int __u32, u32; #define noinline __attribute__((noinline)) static inline int fls(int x) { int bitpos = -1; asm("bsrl %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } static __always_inline int fls64(__u64 x) { #if BITS_PER_LONG == 64 long bitpos = -1; asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; #else __u32 h = x >> 32, l = x; int bitpos = -1; asm("bsrl %1,%0 \n" "subl %2,%0 \n" "bsrl %3,%0 \n" : "+r" (bitpos) : "rm" (l), "i"(32), "rm" (h)); return bitpos + 33; #endif } static inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } static inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) static noinline __attribute__((const)) int old_get_order(unsigned long size) { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } static noinline __attribute__((const)) int __get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 order = fls(size); #else order = fls64(size); #endif return order; } #define get_order(n) \ ( \ __builtin_constant_p(n) ? ( \ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ ((n < (1UL << PAGE_SHIFT)) ? 0 : \ ilog2((n) - 1) - PAGE_SHIFT + 1) \ ) : \ __get_order(n) \ ) #define order(N) \ { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ { (1UL << N), get_order((1UL << N)) }, \ { (1UL << N) + 1, get_order((1UL << N) + 1) } struct order { unsigned long n, order; }; static const struct order order_table[] = { order(0), order(1), order(2), order(3), order(4), order(5), order(6), order(7), order(8), order(9), order(10), order(11), order(12), order(13), order(14), order(15), order(16), order(17), order(18), order(19), order(20), order(21), order(22), order(23), order(24), order(25), order(26), order(27), order(28), order(29), order(30), order(31), #if BITS_PER_LONG == 64 order(32), order(33), order(34), order(35), #endif { 0x2929 } }; void check(int loop, unsigned long n) { unsigned long old, new; printf("[%2d]: %09lx | ", loop, n); old = old_get_order(n); new = get_order(n); printf("%3ld, %3ld\n", old, new); if (n != 0 && old != new) abort(); } int main(int argc, char **argv) { const struct order *p; unsigned long n; int loop; for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { n = 1UL << loop; check(loop, n - 1); check(loop, n); check(loop, n + 1); } for (p = order_table; p->n != 0x2929; p++) { unsigned long old, new; old = old_get_order(p->n); new = p->order; printf("%09lx\t%3ld, %3ld\n", p->n, old, new); if (p->n != 0 && old != new) abort(); } return 0; } Disassembling the x86_64 version of the above code shows: 0000000000400510 <old_get_order>: 400510: 48 83 ef 01 sub $0x1,%rdi 400514: b8 ff ff ff ff mov $0xffffffff,%eax 400519: 48 c1 ef 0b shr $0xb,%rdi 40051d: 0f 1f 00 nopl (%rax) 400520: 83 c0 01 add $0x1,%eax 400523: 48 d1 ef shr %rdi 400526: 75 f8 jne 400520 <old_get_order+0x10> 400528: f3 c3 repz retq 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0000000000400530 <__get_order>: 400530: 48 83 ef 01 sub $0x1,%rdi 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 40053b: 48 c1 ef 0c shr $0xc,%rdi 40053f: 48 0f bd c7 bsr %rdi,%rax 400543: 83 c0 01 add $0x1,%eax 400546: c3 retq 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 40054e: 00 00 As can be seen, the new __get_order() function is simpler than the old_get_order() function. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com> --- include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ 1 files changed, 28 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 76e9687..e0fb4bf 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -4,6 +4,25 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <linux/log2.h> + +/* + * Runtime evaluation of get_order() + */ +static inline __attribute_const__ +int __get_order(unsigned long size) +{ + int order; + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} /** * get_order - Determine the allocation order of a memory size @@ -27,18 +46,15 @@ * This function may be used to initialise variables with compile time * evaluations of constants. */ -static inline __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} +#define get_order(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ + ilog2((n) - 1) - PAGE_SHIFT + 1) \ + ) : \ + __get_order(n) \ +) #endif /* __ASSEMBLY__ */ ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells @ 2012-02-29 20:29 ` Paul Gortmaker 2012-03-01 0:31 ` Stephen Rothwell 2012-03-06 14:24 ` David Howells 0 siblings, 2 replies; 8+ messages in thread From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw) To: mingo, hpa, linux-kernel, arnd, dhowells, tglx Cc: linux-tip-commits, linux-next On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells <dhowells@redhat.com> wrote: > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > Author: David Howells <dhowells@redhat.com> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > Committer: H. Peter Anvin <hpa@zytor.com> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > bitops: Optimise get_order() This is causing build failures on non-x86 in linux next according to git bisect. Here is one example: http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/ Paul. -- > > Optimise get_order() to use bit scanning instructions if such exist rather than > a loop. Also, make it possible to use get_order() in static initialisations > too by building it on top of ilog2() in the constant parameter case. > > This has been tested for i386 and x86_64 using the following userspace program, > and for FRV by making appropriate substitutions for fls() and fls64(). It will > abort if the case for get_order() deviates from the original except for the > order of 0, for which get_order() produces an undefined result. This program > tests both dynamic and static parameters. > > #include <stdlib.h> > #include <stdio.h> > > #ifdef __x86_64__ > #define BITS_PER_LONG 64 > #else > #define BITS_PER_LONG 32 > #endif > > #define PAGE_SHIFT 12 > > typedef unsigned long long __u64, u64; > typedef unsigned int __u32, u32; > #define noinline __attribute__((noinline)) > > static inline int fls(int x) > { > int bitpos = -1; > > asm("bsrl %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > } > > static __always_inline int fls64(__u64 x) > { > #if BITS_PER_LONG == 64 > long bitpos = -1; > > asm("bsrq %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > #else > __u32 h = x >> 32, l = x; > int bitpos = -1; > > asm("bsrl %1,%0 \n" > "subl %2,%0 \n" > "bsrl %3,%0 \n" > : "+r" (bitpos) > : "rm" (l), "i"(32), "rm" (h)); > > return bitpos + 33; > #endif > } > > static inline __attribute__((const)) > int __ilog2_u32(u32 n) > { > return fls(n) - 1; > } > > static inline __attribute__((const)) > int __ilog2_u64(u64 n) > { > return fls64(n) - 1; > } > > extern __attribute__((const, noreturn)) > int ____ilog2_NaN(void); > > #define ilog2(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n) < 1 ? ____ilog2_NaN() : \ > (n) & (1ULL << 63) ? 63 : \ > (n) & (1ULL << 62) ? 62 : \ > (n) & (1ULL << 61) ? 61 : \ > (n) & (1ULL << 60) ? 60 : \ > (n) & (1ULL << 59) ? 59 : \ > (n) & (1ULL << 58) ? 58 : \ > (n) & (1ULL << 57) ? 57 : \ > (n) & (1ULL << 56) ? 56 : \ > (n) & (1ULL << 55) ? 55 : \ > (n) & (1ULL << 54) ? 54 : \ > (n) & (1ULL << 53) ? 53 : \ > (n) & (1ULL << 52) ? 52 : \ > (n) & (1ULL << 51) ? 51 : \ > (n) & (1ULL << 50) ? 50 : \ > (n) & (1ULL << 49) ? 49 : \ > (n) & (1ULL << 48) ? 48 : \ > (n) & (1ULL << 47) ? 47 : \ > (n) & (1ULL << 46) ? 46 : \ > (n) & (1ULL << 45) ? 45 : \ > (n) & (1ULL << 44) ? 44 : \ > (n) & (1ULL << 43) ? 43 : \ > (n) & (1ULL << 42) ? 42 : \ > (n) & (1ULL << 41) ? 41 : \ > (n) & (1ULL << 40) ? 40 : \ > (n) & (1ULL << 39) ? 39 : \ > (n) & (1ULL << 38) ? 38 : \ > (n) & (1ULL << 37) ? 37 : \ > (n) & (1ULL << 36) ? 36 : \ > (n) & (1ULL << 35) ? 35 : \ > (n) & (1ULL << 34) ? 34 : \ > (n) & (1ULL << 33) ? 33 : \ > (n) & (1ULL << 32) ? 32 : \ > (n) & (1ULL << 31) ? 31 : \ > (n) & (1ULL << 30) ? 30 : \ > (n) & (1ULL << 29) ? 29 : \ > (n) & (1ULL << 28) ? 28 : \ > (n) & (1ULL << 27) ? 27 : \ > (n) & (1ULL << 26) ? 26 : \ > (n) & (1ULL << 25) ? 25 : \ > (n) & (1ULL << 24) ? 24 : \ > (n) & (1ULL << 23) ? 23 : \ > (n) & (1ULL << 22) ? 22 : \ > (n) & (1ULL << 21) ? 21 : \ > (n) & (1ULL << 20) ? 20 : \ > (n) & (1ULL << 19) ? 19 : \ > (n) & (1ULL << 18) ? 18 : \ > (n) & (1ULL << 17) ? 17 : \ > (n) & (1ULL << 16) ? 16 : \ > (n) & (1ULL << 15) ? 15 : \ > (n) & (1ULL << 14) ? 14 : \ > (n) & (1ULL << 13) ? 13 : \ > (n) & (1ULL << 12) ? 12 : \ > (n) & (1ULL << 11) ? 11 : \ > (n) & (1ULL << 10) ? 10 : \ > (n) & (1ULL << 9) ? 9 : \ > (n) & (1ULL << 8) ? 8 : \ > (n) & (1ULL << 7) ? 7 : \ > (n) & (1ULL << 6) ? 6 : \ > (n) & (1ULL << 5) ? 5 : \ > (n) & (1ULL << 4) ? 4 : \ > (n) & (1ULL << 3) ? 3 : \ > (n) & (1ULL << 2) ? 2 : \ > (n) & (1ULL << 1) ? 1 : \ > (n) & (1ULL << 0) ? 0 : \ > ____ilog2_NaN() \ > ) : \ > (sizeof(n) <= 4) ? \ > __ilog2_u32(n) : \ > __ilog2_u64(n) \ > ) > > static noinline __attribute__((const)) > int old_get_order(unsigned long size) > { > int order; > > size = (size - 1) >> (PAGE_SHIFT - 1); > order = -1; > do { > size >>= 1; > order++; > } while (size); > return order; > } > > static noinline __attribute__((const)) > int __get_order(unsigned long size) > { > int order; > size--; > size >>= PAGE_SHIFT; > #if BITS_PER_LONG == 32 > order = fls(size); > #else > order = fls64(size); > #endif > return order; > } > > #define get_order(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > ilog2((n) - 1) - PAGE_SHIFT + 1) \ > ) : \ > __get_order(n) \ > ) > > #define order(N) \ > { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ > { (1UL << N), get_order((1UL << N)) }, \ > { (1UL << N) + 1, get_order((1UL << N) + 1) } > > struct order { > unsigned long n, order; > }; > > static const struct order order_table[] = { > order(0), > order(1), > order(2), > order(3), > order(4), > order(5), > order(6), > order(7), > order(8), > order(9), > order(10), > order(11), > order(12), > order(13), > order(14), > order(15), > order(16), > order(17), > order(18), > order(19), > order(20), > order(21), > order(22), > order(23), > order(24), > order(25), > order(26), > order(27), > order(28), > order(29), > order(30), > order(31), > #if BITS_PER_LONG == 64 > order(32), > order(33), > order(34), > order(35), > #endif > { 0x2929 } > }; > > void check(int loop, unsigned long n) > { > unsigned long old, new; > > printf("[%2d]: %09lx | ", loop, n); > > old = old_get_order(n); > new = get_order(n); > > printf("%3ld, %3ld\n", old, new); > if (n != 0 && old != new) > abort(); > } > > int main(int argc, char **argv) > { > const struct order *p; > unsigned long n; > int loop; > > for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { > n = 1UL << loop; > check(loop, n - 1); > check(loop, n); > check(loop, n + 1); > } > > for (p = order_table; p->n != 0x2929; p++) { > unsigned long old, new; > > old = old_get_order(p->n); > new = p->order; > printf("%09lx\t%3ld, %3ld\n", p->n, old, new); > if (p->n != 0 && old != new) > abort(); > } > > return 0; > } > > Disassembling the x86_64 version of the above code shows: > > 0000000000400510 <old_get_order>: > 400510: 48 83 ef 01 sub $0x1,%rdi > 400514: b8 ff ff ff ff mov $0xffffffff,%eax > 400519: 48 c1 ef 0b shr $0xb,%rdi > 40051d: 0f 1f 00 nopl (%rax) > 400520: 83 c0 01 add $0x1,%eax > 400523: 48 d1 ef shr %rdi > 400526: 75 f8 jne 400520 <old_get_order+0x10> > 400528: f3 c3 repz retq > 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) > > 0000000000400530 <__get_order>: > 400530: 48 83 ef 01 sub $0x1,%rdi > 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > 40053b: 48 c1 ef 0c shr $0xc,%rdi > 40053f: 48 0f bd c7 bsr %rdi,%rax > 400543: 83 c0 01 add $0x1,%eax > 400546: c3 retq > 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) > 40054e: 00 00 > > As can be seen, the new __get_order() function is simpler than the > old_get_order() function. > > Signed-off-by: David Howells <dhowells@redhat.com> > Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk > Acked-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: H. Peter Anvin <hpa@zytor.com> > --- > include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ > 1 files changed, 28 insertions(+), 12 deletions(-) > > diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h > index 76e9687..e0fb4bf 100644 > --- a/include/asm-generic/getorder.h > +++ b/include/asm-generic/getorder.h > @@ -4,6 +4,25 @@ > #ifndef __ASSEMBLY__ > > #include <linux/compiler.h> > +#include <linux/log2.h> > + > +/* > + * Runtime evaluation of get_order() > + */ > +static inline __attribute_const__ > +int __get_order(unsigned long size) > +{ > + int order; > + > + size--; > + size >>= PAGE_SHIFT; > +#if BITS_PER_LONG == 32 > + order = fls(size); > +#else > + order = fls64(size); > +#endif > + return order; > +} > > /** > * get_order - Determine the allocation order of a memory size > @@ -27,18 +46,15 @@ > * This function may be used to initialise variables with compile time > * evaluations of constants. > */ > -static inline __attribute_const__ int get_order(unsigned long size) > -{ > - int order; > - > - size = (size - 1) >> (PAGE_SHIFT - 1); > - order = -1; > - do { > - size >>= 1; > - order++; > - } while (size); > - return order; > -} > +#define get_order(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > + ilog2((n) - 1) - PAGE_SHIFT + 1) \ > + ) : \ > + __get_order(n) \ > +) > > #endif /* __ASSEMBLY__ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥ ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-02-29 20:29 ` Paul Gortmaker @ 2012-03-01 0:31 ` Stephen Rothwell 2012-03-01 21:02 ` H. Peter Anvin 2012-03-06 14:24 ` David Howells 1 sibling, 1 reply; 8+ messages in thread From: Stephen Rothwell @ 2012-03-01 0:31 UTC (permalink / raw) To: Paul Gortmaker Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits, linux-next [-- Attachment #1: Type: text/plain, Size: 1199 bytes --] On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote: > > On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells > <dhowells@redhat.com> wrote: > > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > > Author: David Howells <dhowells@redhat.com> > > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > > Committer: H. Peter Anvin <hpa@zytor.com> > > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > > > bitops: Optimise get_order() > > This is causing build failures on non-x86 in linux next according to git bisect. Presumably it needs to include linux/bitops.h (and see below). > > +static inline __attribute_const__ > > +int __get_order(unsigned long size) > > +{ > > + int order; > > + > > + size--; > > + size >>= PAGE_SHIFT; > > +#if BITS_PER_LONG == 32 > > + order = fls(size); > > +#else > > + order = fls64(size); > > +#endif linux/bitops.h has fls_long() that does this size test and calls the right thing. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --] ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-03-01 0:31 ` Stephen Rothwell @ 2012-03-01 21:02 ` H. Peter Anvin 0 siblings, 0 replies; 8+ messages in thread From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw) To: Stephen Rothwell Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx, linux-tip-commits, linux-next David, could you make a fix for this? -hpa On 02/29/2012 04:31 PM, Stephen Rothwell wrote: > On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com> wrote: >> >> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells >> <dhowells@redhat.com> wrote: >>> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e >>> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e >>> Author: David Howells<dhowells@redhat.com> >>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 >>> Committer: H. Peter Anvin<hpa@zytor.com> >>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 >>> >>> bitops: Optimise get_order() >> >> This is causing build failures on non-x86 in linux next according to git bisect. > > Presumably it needs to include linux/bitops.h (and see below). > >>> +static inline __attribute_const__ >>> +int __get_order(unsigned long size) >>> +{ >>> + int order; >>> + >>> + size--; >>> + size>>= PAGE_SHIFT; >>> +#if BITS_PER_LONG == 32 >>> + order = fls(size); >>> +#else >>> + order = fls64(size); >>> +#endif > > linux/bitops.h has fls_long() that does this size test and calls the right thing. > ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-02-29 20:29 ` Paul Gortmaker 2012-03-01 0:31 ` Stephen Rothwell @ 2012-03-06 14:24 ` David Howells 1 sibling, 0 replies; 8+ messages in thread From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw) To: Stephen Rothwell Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx, linux-tip-commits, linux-next Stephen Rothwell <sfr@canb.auug.org.au> wrote: > Presumably it needs to include linux/bitops.h (and see below). That won't help. It already includes that via linux/log2.h. The error chain looks suspiciously like a circular dependency has occurred. David ^ permalink raw reply [flat|nested] 8+ messages in thread
* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells @ 2012-02-20 23:19 ` tip-bot for David Howells 1 sibling, 0 replies; 8+ messages in thread From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw) To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx Commit-ID: e0891a9816316b5e05fd5b0453ffe9fd6a56f489 Gitweb: http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489 Author: David Howells <dhowells@redhat.com> AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000 Committer: H. Peter Anvin <hpa@zytor.com> CommitDate: Mon, 20 Feb 2012 14:46:55 -0800 bitops: Adjust the comment on get_order() to describe the size==0 case Adjust the comment on get_order() to note that the result of passing a size of 0 results in an undefined value. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com> --- include/asm-generic/getorder.h | 23 ++++++++++++++++++++++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 67e7245..76e9687 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -5,7 +5,28 @@ #include <linux/compiler.h> -/* Pure 2^n version of get_order */ +/** + * get_order - Determine the allocation order of a memory size + * @size: The size for which to get the order + * + * Determine the allocation order of a particular sized block of memory. This + * is on a logarithmic scale, where: + * + * 0 -> 2^0 * PAGE_SIZE and below + * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 + * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 + * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 + * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 + * ... + * + * The order returned is used to find the smallest allocation granule required + * to hold an object of the specified size. + * + * The result is undefined if the size is 0. + * + * This function may be used to initialise variables with compile time + * evaluations of constants. + */ static inline __attribute_const__ int get_order(unsigned long size) { int order; ^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-03-06 14:25 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells 2012-02-29 20:29 ` Paul Gortmaker 2012-03-01 0:31 ` Stephen Rothwell 2012-03-01 21:02 ` H. Peter Anvin 2012-03-06 14:24 ` David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).