* [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case @ 2012-02-20 22:39 David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells 0 siblings, 2 replies; 9+ messages in thread From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw) To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann Adjust the comment on get_order() to note that the result of passing a size of 0 results in an undefined value. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> --- include/asm-generic/getorder.h | 23 ++++++++++++++++++++++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 67e7245..76e9687 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -5,7 +5,28 @@ #include <linux/compiler.h> -/* Pure 2^n version of get_order */ +/** + * get_order - Determine the allocation order of a memory size + * @size: The size for which to get the order + * + * Determine the allocation order of a particular sized block of memory. This + * is on a logarithmic scale, where: + * + * 0 -> 2^0 * PAGE_SIZE and below + * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 + * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 + * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 + * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 + * ... + * + * The order returned is used to find the smallest allocation granule required + * to hold an object of the specified size. + * + * The result is undefined if the size is 0. + * + * This function may be used to initialise variables with compile time + * evaluations of constants. + */ static inline __attribute_const__ int get_order(unsigned long size) { int order; ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] Optimise get_order() 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells @ 2012-02-20 22:39 ` David Howells 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells 1 sibling, 1 reply; 9+ messages in thread From: David Howells @ 2012-02-20 22:39 UTC (permalink / raw) To: hpa; +Cc: x86, linux-arch, linux-kernel, David Howells, Arnd Bergmann Optimise get_order() to use bit scanning instructions if such exist rather than a loop. Also, make it possible to use get_order() in static initialisations too by building it on top of ilog2() in the constant parameter case. This has been tested for i386 and x86_64 using the following userspace program, and for FRV by making appropriate substitutions for fls() and fls64(). It will abort if the case for get_order() deviates from the original except for the order of 0, for which get_order() produces an undefined result. This program tests both dynamic and static parameters. #include <stdlib.h> #include <stdio.h> #ifdef __x86_64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define PAGE_SHIFT 12 typedef unsigned long long __u64, u64; typedef unsigned int __u32, u32; #define noinline __attribute__((noinline)) static inline int fls(int x) { int bitpos = -1; asm("bsrl %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } static __always_inline int fls64(__u64 x) { #if BITS_PER_LONG == 64 long bitpos = -1; asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; #else __u32 h = x >> 32, l = x; int bitpos = -1; asm("bsrl %1,%0 \n" "subl %2,%0 \n" "bsrl %3,%0 \n" : "+r" (bitpos) : "rm" (l), "i"(32), "rm" (h)); return bitpos + 33; #endif } static inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } static inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) static noinline __attribute__((const)) int old_get_order(unsigned long size) { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } static noinline __attribute__((const)) int __get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 order = fls(size); #else order = fls64(size); #endif return order; } #define get_order(n) \ ( \ __builtin_constant_p(n) ? ( \ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ ((n < (1UL << PAGE_SHIFT)) ? 0 : \ ilog2((n) - 1) - PAGE_SHIFT + 1) \ ) : \ __get_order(n) \ ) #define order(N) \ { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ { (1UL << N), get_order((1UL << N)) }, \ { (1UL << N) + 1, get_order((1UL << N) + 1) } struct order { unsigned long n, order; }; static const struct order order_table[] = { order(0), order(1), order(2), order(3), order(4), order(5), order(6), order(7), order(8), order(9), order(10), order(11), order(12), order(13), order(14), order(15), order(16), order(17), order(18), order(19), order(20), order(21), order(22), order(23), order(24), order(25), order(26), order(27), order(28), order(29), order(30), order(31), #if BITS_PER_LONG == 64 order(32), order(33), order(34), order(35), #endif { 0x2929 } }; void check(int loop, unsigned long n) { unsigned long old, new; printf("[%2d]: %09lx | ", loop, n); old = old_get_order(n); new = get_order(n); printf("%3ld, %3ld\n", old, new); if (n != 0 && old != new) abort(); } int main(int argc, char **argv) { const struct order *p; unsigned long n; int loop; for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { n = 1UL << loop; check(loop, n - 1); check(loop, n); check(loop, n + 1); } for (p = order_table; p->n != 0x2929; p++) { unsigned long old, new; old = old_get_order(p->n); new = p->order; printf("%09lx\t%3ld, %3ld\n", p->n, old, new); if (p->n != 0 && old != new) abort(); } return 0; } Disassembling the x86_64 version of the above code shows: 0000000000400510 <old_get_order>: 400510: 48 83 ef 01 sub $0x1,%rdi 400514: b8 ff ff ff ff mov $0xffffffff,%eax 400519: 48 c1 ef 0b shr $0xb,%rdi 40051d: 0f 1f 00 nopl (%rax) 400520: 83 c0 01 add $0x1,%eax 400523: 48 d1 ef shr %rdi 400526: 75 f8 jne 400520 <old_get_order+0x10> 400528: f3 c3 repz retq 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0000000000400530 <__get_order>: 400530: 48 83 ef 01 sub $0x1,%rdi 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 40053b: 48 c1 ef 0c shr $0xc,%rdi 40053f: 48 0f bd c7 bsr %rdi,%rax 400543: 83 c0 01 add $0x1,%eax 400546: c3 retq 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 40054e: 00 00 As can be seen, the new __get_order() function is simpler than the old_get_order() function. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> --- include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ 1 files changed, 28 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 76e9687..e0fb4bf 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -4,6 +4,25 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <linux/log2.h> + +/* + * Runtime evaluation of get_order() + */ +static inline __attribute_const__ +int __get_order(unsigned long size) +{ + int order; + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} /** * get_order - Determine the allocation order of a memory size @@ -27,18 +46,15 @@ * This function may be used to initialise variables with compile time * evaluations of constants. */ -static inline __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} +#define get_order(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ + ilog2((n) - 1) - PAGE_SHIFT + 1) \ + ) : \ + __get_order(n) \ +) #endif /* __ASSEMBLY__ */ ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [tip:x86/asm] bitops: Optimise get_order() 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells @ 2012-02-20 23:20 ` tip-bot for David Howells 2012-02-29 20:29 ` Paul Gortmaker 0 siblings, 1 reply; 9+ messages in thread From: tip-bot for David Howells @ 2012-02-20 23:20 UTC (permalink / raw) To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e Author: David Howells <dhowells@redhat.com> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 Committer: H. Peter Anvin <hpa@zytor.com> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 bitops: Optimise get_order() Optimise get_order() to use bit scanning instructions if such exist rather than a loop. Also, make it possible to use get_order() in static initialisations too by building it on top of ilog2() in the constant parameter case. This has been tested for i386 and x86_64 using the following userspace program, and for FRV by making appropriate substitutions for fls() and fls64(). It will abort if the case for get_order() deviates from the original except for the order of 0, for which get_order() produces an undefined result. This program tests both dynamic and static parameters. #include <stdlib.h> #include <stdio.h> #ifdef __x86_64__ #define BITS_PER_LONG 64 #else #define BITS_PER_LONG 32 #endif #define PAGE_SHIFT 12 typedef unsigned long long __u64, u64; typedef unsigned int __u32, u32; #define noinline __attribute__((noinline)) static inline int fls(int x) { int bitpos = -1; asm("bsrl %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; } static __always_inline int fls64(__u64 x) { #if BITS_PER_LONG == 64 long bitpos = -1; asm("bsrq %1,%0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; #else __u32 h = x >> 32, l = x; int bitpos = -1; asm("bsrl %1,%0 \n" "subl %2,%0 \n" "bsrl %3,%0 \n" : "+r" (bitpos) : "rm" (l), "i"(32), "rm" (h)); return bitpos + 33; #endif } static inline __attribute__((const)) int __ilog2_u32(u32 n) { return fls(n) - 1; } static inline __attribute__((const)) int __ilog2_u64(u64 n) { return fls64(n) - 1; } extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 1 ? ____ilog2_NaN() : \ (n) & (1ULL << 63) ? 63 : \ (n) & (1ULL << 62) ? 62 : \ (n) & (1ULL << 61) ? 61 : \ (n) & (1ULL << 60) ? 60 : \ (n) & (1ULL << 59) ? 59 : \ (n) & (1ULL << 58) ? 58 : \ (n) & (1ULL << 57) ? 57 : \ (n) & (1ULL << 56) ? 56 : \ (n) & (1ULL << 55) ? 55 : \ (n) & (1ULL << 54) ? 54 : \ (n) & (1ULL << 53) ? 53 : \ (n) & (1ULL << 52) ? 52 : \ (n) & (1ULL << 51) ? 51 : \ (n) & (1ULL << 50) ? 50 : \ (n) & (1ULL << 49) ? 49 : \ (n) & (1ULL << 48) ? 48 : \ (n) & (1ULL << 47) ? 47 : \ (n) & (1ULL << 46) ? 46 : \ (n) & (1ULL << 45) ? 45 : \ (n) & (1ULL << 44) ? 44 : \ (n) & (1ULL << 43) ? 43 : \ (n) & (1ULL << 42) ? 42 : \ (n) & (1ULL << 41) ? 41 : \ (n) & (1ULL << 40) ? 40 : \ (n) & (1ULL << 39) ? 39 : \ (n) & (1ULL << 38) ? 38 : \ (n) & (1ULL << 37) ? 37 : \ (n) & (1ULL << 36) ? 36 : \ (n) & (1ULL << 35) ? 35 : \ (n) & (1ULL << 34) ? 34 : \ (n) & (1ULL << 33) ? 33 : \ (n) & (1ULL << 32) ? 32 : \ (n) & (1ULL << 31) ? 31 : \ (n) & (1ULL << 30) ? 30 : \ (n) & (1ULL << 29) ? 29 : \ (n) & (1ULL << 28) ? 28 : \ (n) & (1ULL << 27) ? 27 : \ (n) & (1ULL << 26) ? 26 : \ (n) & (1ULL << 25) ? 25 : \ (n) & (1ULL << 24) ? 24 : \ (n) & (1ULL << 23) ? 23 : \ (n) & (1ULL << 22) ? 22 : \ (n) & (1ULL << 21) ? 21 : \ (n) & (1ULL << 20) ? 20 : \ (n) & (1ULL << 19) ? 19 : \ (n) & (1ULL << 18) ? 18 : \ (n) & (1ULL << 17) ? 17 : \ (n) & (1ULL << 16) ? 16 : \ (n) & (1ULL << 15) ? 15 : \ (n) & (1ULL << 14) ? 14 : \ (n) & (1ULL << 13) ? 13 : \ (n) & (1ULL << 12) ? 12 : \ (n) & (1ULL << 11) ? 11 : \ (n) & (1ULL << 10) ? 10 : \ (n) & (1ULL << 9) ? 9 : \ (n) & (1ULL << 8) ? 8 : \ (n) & (1ULL << 7) ? 7 : \ (n) & (1ULL << 6) ? 6 : \ (n) & (1ULL << 5) ? 5 : \ (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ (n) & (1ULL << 1) ? 1 : \ (n) & (1ULL << 0) ? 0 : \ ____ilog2_NaN() \ ) : \ (sizeof(n) <= 4) ? \ __ilog2_u32(n) : \ __ilog2_u64(n) \ ) static noinline __attribute__((const)) int old_get_order(unsigned long size) { int order; size = (size - 1) >> (PAGE_SHIFT - 1); order = -1; do { size >>= 1; order++; } while (size); return order; } static noinline __attribute__((const)) int __get_order(unsigned long size) { int order; size--; size >>= PAGE_SHIFT; #if BITS_PER_LONG == 32 order = fls(size); #else order = fls64(size); #endif return order; } #define get_order(n) \ ( \ __builtin_constant_p(n) ? ( \ (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ ((n < (1UL << PAGE_SHIFT)) ? 0 : \ ilog2((n) - 1) - PAGE_SHIFT + 1) \ ) : \ __get_order(n) \ ) #define order(N) \ { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ { (1UL << N), get_order((1UL << N)) }, \ { (1UL << N) + 1, get_order((1UL << N) + 1) } struct order { unsigned long n, order; }; static const struct order order_table[] = { order(0), order(1), order(2), order(3), order(4), order(5), order(6), order(7), order(8), order(9), order(10), order(11), order(12), order(13), order(14), order(15), order(16), order(17), order(18), order(19), order(20), order(21), order(22), order(23), order(24), order(25), order(26), order(27), order(28), order(29), order(30), order(31), #if BITS_PER_LONG == 64 order(32), order(33), order(34), order(35), #endif { 0x2929 } }; void check(int loop, unsigned long n) { unsigned long old, new; printf("[%2d]: %09lx | ", loop, n); old = old_get_order(n); new = get_order(n); printf("%3ld, %3ld\n", old, new); if (n != 0 && old != new) abort(); } int main(int argc, char **argv) { const struct order *p; unsigned long n; int loop; for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { n = 1UL << loop; check(loop, n - 1); check(loop, n); check(loop, n + 1); } for (p = order_table; p->n != 0x2929; p++) { unsigned long old, new; old = old_get_order(p->n); new = p->order; printf("%09lx\t%3ld, %3ld\n", p->n, old, new); if (p->n != 0 && old != new) abort(); } return 0; } Disassembling the x86_64 version of the above code shows: 0000000000400510 <old_get_order>: 400510: 48 83 ef 01 sub $0x1,%rdi 400514: b8 ff ff ff ff mov $0xffffffff,%eax 400519: 48 c1 ef 0b shr $0xb,%rdi 40051d: 0f 1f 00 nopl (%rax) 400520: 83 c0 01 add $0x1,%eax 400523: 48 d1 ef shr %rdi 400526: 75 f8 jne 400520 <old_get_order+0x10> 400528: f3 c3 repz retq 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 0000000000400530 <__get_order>: 400530: 48 83 ef 01 sub $0x1,%rdi 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 40053b: 48 c1 ef 0c shr $0xc,%rdi 40053f: 48 0f bd c7 bsr %rdi,%rax 400543: 83 c0 01 add $0x1,%eax 400546: c3 retq 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 40054e: 00 00 As can be seen, the new __get_order() function is simpler than the old_get_order() function. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com> --- include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ 1 files changed, 28 insertions(+), 12 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 76e9687..e0fb4bf 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -4,6 +4,25 @@ #ifndef __ASSEMBLY__ #include <linux/compiler.h> +#include <linux/log2.h> + +/* + * Runtime evaluation of get_order() + */ +static inline __attribute_const__ +int __get_order(unsigned long size) +{ + int order; + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} /** * get_order - Determine the allocation order of a memory size @@ -27,18 +46,15 @@ * This function may be used to initialise variables with compile time * evaluations of constants. */ -static inline __attribute_const__ int get_order(unsigned long size) -{ - int order; - - size = (size - 1) >> (PAGE_SHIFT - 1); - order = -1; - do { - size >>= 1; - order++; - } while (size); - return order; -} +#define get_order(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ + ilog2((n) - 1) - PAGE_SHIFT + 1) \ + ) : \ + __get_order(n) \ +) #endif /* __ASSEMBLY__ */ ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells @ 2012-02-29 20:29 ` Paul Gortmaker 0 siblings, 0 replies; 9+ messages in thread From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw) To: mingo, hpa, linux-kernel, arnd, dhowells, tglx Cc: linux-tip-commits, linux-next On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells <dhowells@redhat.com> wrote: > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > Author: David Howells <dhowells@redhat.com> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > Committer: H. Peter Anvin <hpa@zytor.com> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > bitops: Optimise get_order() This is causing build failures on non-x86 in linux next according to git bisect. Here is one example: http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/ Paul. -- > > Optimise get_order() to use bit scanning instructions if such exist rather than > a loop. Also, make it possible to use get_order() in static initialisations > too by building it on top of ilog2() in the constant parameter case. > > This has been tested for i386 and x86_64 using the following userspace program, > and for FRV by making appropriate substitutions for fls() and fls64(). It will > abort if the case for get_order() deviates from the original except for the > order of 0, for which get_order() produces an undefined result. This program > tests both dynamic and static parameters. > > #include <stdlib.h> > #include <stdio.h> > > #ifdef __x86_64__ > #define BITS_PER_LONG 64 > #else > #define BITS_PER_LONG 32 > #endif > > #define PAGE_SHIFT 12 > > typedef unsigned long long __u64, u64; > typedef unsigned int __u32, u32; > #define noinline __attribute__((noinline)) > > static inline int fls(int x) > { > int bitpos = -1; > > asm("bsrl %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > } > > static __always_inline int fls64(__u64 x) > { > #if BITS_PER_LONG == 64 > long bitpos = -1; > > asm("bsrq %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > #else > __u32 h = x >> 32, l = x; > int bitpos = -1; > > asm("bsrl %1,%0 \n" > "subl %2,%0 \n" > "bsrl %3,%0 \n" > : "+r" (bitpos) > : "rm" (l), "i"(32), "rm" (h)); > > return bitpos + 33; > #endif > } > > static inline __attribute__((const)) > int __ilog2_u32(u32 n) > { > return fls(n) - 1; > } > > static inline __attribute__((const)) > int __ilog2_u64(u64 n) > { > return fls64(n) - 1; > } > > extern __attribute__((const, noreturn)) > int ____ilog2_NaN(void); > > #define ilog2(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n) < 1 ? ____ilog2_NaN() : \ > (n) & (1ULL << 63) ? 63 : \ > (n) & (1ULL << 62) ? 62 : \ > (n) & (1ULL << 61) ? 61 : \ > (n) & (1ULL << 60) ? 60 : \ > (n) & (1ULL << 59) ? 59 : \ > (n) & (1ULL << 58) ? 58 : \ > (n) & (1ULL << 57) ? 57 : \ > (n) & (1ULL << 56) ? 56 : \ > (n) & (1ULL << 55) ? 55 : \ > (n) & (1ULL << 54) ? 54 : \ > (n) & (1ULL << 53) ? 53 : \ > (n) & (1ULL << 52) ? 52 : \ > (n) & (1ULL << 51) ? 51 : \ > (n) & (1ULL << 50) ? 50 : \ > (n) & (1ULL << 49) ? 49 : \ > (n) & (1ULL << 48) ? 48 : \ > (n) & (1ULL << 47) ? 47 : \ > (n) & (1ULL << 46) ? 46 : \ > (n) & (1ULL << 45) ? 45 : \ > (n) & (1ULL << 44) ? 44 : \ > (n) & (1ULL << 43) ? 43 : \ > (n) & (1ULL << 42) ? 42 : \ > (n) & (1ULL << 41) ? 41 : \ > (n) & (1ULL << 40) ? 40 : \ > (n) & (1ULL << 39) ? 39 : \ > (n) & (1ULL << 38) ? 38 : \ > (n) & (1ULL << 37) ? 37 : \ > (n) & (1ULL << 36) ? 36 : \ > (n) & (1ULL << 35) ? 35 : \ > (n) & (1ULL << 34) ? 34 : \ > (n) & (1ULL << 33) ? 33 : \ > (n) & (1ULL << 32) ? 32 : \ > (n) & (1ULL << 31) ? 31 : \ > (n) & (1ULL << 30) ? 30 : \ > (n) & (1ULL << 29) ? 29 : \ > (n) & (1ULL << 28) ? 28 : \ > (n) & (1ULL << 27) ? 27 : \ > (n) & (1ULL << 26) ? 26 : \ > (n) & (1ULL << 25) ? 25 : \ > (n) & (1ULL << 24) ? 24 : \ > (n) & (1ULL << 23) ? 23 : \ > (n) & (1ULL << 22) ? 22 : \ > (n) & (1ULL << 21) ? 21 : \ > (n) & (1ULL << 20) ? 20 : \ > (n) & (1ULL << 19) ? 19 : \ > (n) & (1ULL << 18) ? 18 : \ > (n) & (1ULL << 17) ? 17 : \ > (n) & (1ULL << 16) ? 16 : \ > (n) & (1ULL << 15) ? 15 : \ > (n) & (1ULL << 14) ? 14 : \ > (n) & (1ULL << 13) ? 13 : \ > (n) & (1ULL << 12) ? 12 : \ > (n) & (1ULL << 11) ? 11 : \ > (n) & (1ULL << 10) ? 10 : \ > (n) & (1ULL << 9) ? 9 : \ > (n) & (1ULL << 8) ? 8 : \ > (n) & (1ULL << 7) ? 7 : \ > (n) & (1ULL << 6) ? 6 : \ > (n) & (1ULL << 5) ? 5 : \ > (n) & (1ULL << 4) ? 4 : \ > (n) & (1ULL << 3) ? 3 : \ > (n) & (1ULL << 2) ? 2 : \ > (n) & (1ULL << 1) ? 1 : \ > (n) & (1ULL << 0) ? 0 : \ > ____ilog2_NaN() \ > ) : \ > (sizeof(n) <= 4) ? \ > __ilog2_u32(n) : \ > __ilog2_u64(n) \ > ) > > static noinline __attribute__((const)) > int old_get_order(unsigned long size) > { > int order; > > size = (size - 1) >> (PAGE_SHIFT - 1); > order = -1; > do { > size >>= 1; > order++; > } while (size); > return order; > } > > static noinline __attribute__((const)) > int __get_order(unsigned long size) > { > int order; > size--; > size >>= PAGE_SHIFT; > #if BITS_PER_LONG == 32 > order = fls(size); > #else > order = fls64(size); > #endif > return order; > } > > #define get_order(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > ilog2((n) - 1) - PAGE_SHIFT + 1) \ > ) : \ > __get_order(n) \ > ) > > #define order(N) \ > { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ > { (1UL << N), get_order((1UL << N)) }, \ > { (1UL << N) + 1, get_order((1UL << N) + 1) } > > struct order { > unsigned long n, order; > }; > > static const struct order order_table[] = { > order(0), > order(1), > order(2), > order(3), > order(4), > order(5), > order(6), > order(7), > order(8), > order(9), > order(10), > order(11), > order(12), > order(13), > order(14), > order(15), > order(16), > order(17), > order(18), > order(19), > order(20), > order(21), > order(22), > order(23), > order(24), > order(25), > order(26), > order(27), > order(28), > order(29), > order(30), > order(31), > #if BITS_PER_LONG == 64 > order(32), > order(33), > order(34), > order(35), > #endif > { 0x2929 } > }; > > void check(int loop, unsigned long n) > { > unsigned long old, new; > > printf("[%2d]: %09lx | ", loop, n); > > old = old_get_order(n); > new = get_order(n); > > printf("%3ld, %3ld\n", old, new); > if (n != 0 && old != new) > abort(); > } > > int main(int argc, char **argv) > { > const struct order *p; > unsigned long n; > int loop; > > for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { > n = 1UL << loop; > check(loop, n - 1); > check(loop, n); > check(loop, n + 1); > } > > for (p = order_table; p->n != 0x2929; p++) { > unsigned long old, new; > > old = old_get_order(p->n); > new = p->order; > printf("%09lx\t%3ld, %3ld\n", p->n, old, new); > if (p->n != 0 && old != new) > abort(); > } > > return 0; > } > > Disassembling the x86_64 version of the above code shows: > > 0000000000400510 <old_get_order>: > 400510: 48 83 ef 01 sub $0x1,%rdi > 400514: b8 ff ff ff ff mov $0xffffffff,%eax > 400519: 48 c1 ef 0b shr $0xb,%rdi > 40051d: 0f 1f 00 nopl (%rax) > 400520: 83 c0 01 add $0x1,%eax > 400523: 48 d1 ef shr %rdi > 400526: 75 f8 jne 400520 <old_get_order+0x10> > 400528: f3 c3 repz retq > 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) > > 0000000000400530 <__get_order>: > 400530: 48 83 ef 01 sub $0x1,%rdi > 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > 40053b: 48 c1 ef 0c shr $0xc,%rdi > 40053f: 48 0f bd c7 bsr %rdi,%rax > 400543: 83 c0 01 add $0x1,%eax > 400546: c3 retq > 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) > 40054e: 00 00 > > As can be seen, the new __get_order() function is simpler than the > old_get_order() function. > > Signed-off-by: David Howells <dhowells@redhat.com> > Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk > Acked-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: H. Peter Anvin <hpa@zytor.com> > --- > include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ > 1 files changed, 28 insertions(+), 12 deletions(-) > > diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h > index 76e9687..e0fb4bf 100644 > --- a/include/asm-generic/getorder.h > +++ b/include/asm-generic/getorder.h > @@ -4,6 +4,25 @@ > #ifndef __ASSEMBLY__ > > #include <linux/compiler.h> > +#include <linux/log2.h> > + > +/* > + * Runtime evaluation of get_order() > + */ > +static inline __attribute_const__ > +int __get_order(unsigned long size) > +{ > + int order; > + > + size--; > + size >>= PAGE_SHIFT; > +#if BITS_PER_LONG == 32 > + order = fls(size); > +#else > + order = fls64(size); > +#endif > + return order; > +} > > /** > * get_order - Determine the allocation order of a memory size > @@ -27,18 +46,15 @@ > * This function may be used to initialise variables with compile time > * evaluations of constants. > */ > -static inline __attribute_const__ int get_order(unsigned long size) > -{ > - int order; > - > - size = (size - 1) >> (PAGE_SHIFT - 1); > - order = -1; > - do { > - size >>= 1; > - order++; > - } while (size); > - return order; > -} > +#define get_order(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > + ilog2((n) - 1) - PAGE_SHIFT + 1) \ > + ) : \ > + __get_order(n) \ > +) > > #endif /* __ASSEMBLY__ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() @ 2012-02-29 20:29 ` Paul Gortmaker 0 siblings, 0 replies; 9+ messages in thread From: Paul Gortmaker @ 2012-02-29 20:29 UTC (permalink / raw) To: mingo, hpa, linux-kernel, arnd, dhowells, tglx Cc: linux-tip-commits, linux-next On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells <dhowells@redhat.com> wrote: > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > Author: David Howells <dhowells@redhat.com> > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > Committer: H. Peter Anvin <hpa@zytor.com> > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > bitops: Optimise get_order() This is causing build failures on non-x86 in linux next according to git bisect. Here is one example: http://kisskb.ellerman.id.au/kisskb/buildresult/5746632/ Paul. -- > > Optimise get_order() to use bit scanning instructions if such exist rather than > a loop. Also, make it possible to use get_order() in static initialisations > too by building it on top of ilog2() in the constant parameter case. > > This has been tested for i386 and x86_64 using the following userspace program, > and for FRV by making appropriate substitutions for fls() and fls64(). It will > abort if the case for get_order() deviates from the original except for the > order of 0, for which get_order() produces an undefined result. This program > tests both dynamic and static parameters. > > #include <stdlib.h> > #include <stdio.h> > > #ifdef __x86_64__ > #define BITS_PER_LONG 64 > #else > #define BITS_PER_LONG 32 > #endif > > #define PAGE_SHIFT 12 > > typedef unsigned long long __u64, u64; > typedef unsigned int __u32, u32; > #define noinline __attribute__((noinline)) > > static inline int fls(int x) > { > int bitpos = -1; > > asm("bsrl %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > } > > static __always_inline int fls64(__u64 x) > { > #if BITS_PER_LONG == 64 > long bitpos = -1; > > asm("bsrq %1,%0" > : "+r" (bitpos) > : "rm" (x)); > return bitpos + 1; > #else > __u32 h = x >> 32, l = x; > int bitpos = -1; > > asm("bsrl %1,%0 \n" > "subl %2,%0 \n" > "bsrl %3,%0 \n" > : "+r" (bitpos) > : "rm" (l), "i"(32), "rm" (h)); > > return bitpos + 33; > #endif > } > > static inline __attribute__((const)) > int __ilog2_u32(u32 n) > { > return fls(n) - 1; > } > > static inline __attribute__((const)) > int __ilog2_u64(u64 n) > { > return fls64(n) - 1; > } > > extern __attribute__((const, noreturn)) > int ____ilog2_NaN(void); > > #define ilog2(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n) < 1 ? ____ilog2_NaN() : \ > (n) & (1ULL << 63) ? 63 : \ > (n) & (1ULL << 62) ? 62 : \ > (n) & (1ULL << 61) ? 61 : \ > (n) & (1ULL << 60) ? 60 : \ > (n) & (1ULL << 59) ? 59 : \ > (n) & (1ULL << 58) ? 58 : \ > (n) & (1ULL << 57) ? 57 : \ > (n) & (1ULL << 56) ? 56 : \ > (n) & (1ULL << 55) ? 55 : \ > (n) & (1ULL << 54) ? 54 : \ > (n) & (1ULL << 53) ? 53 : \ > (n) & (1ULL << 52) ? 52 : \ > (n) & (1ULL << 51) ? 51 : \ > (n) & (1ULL << 50) ? 50 : \ > (n) & (1ULL << 49) ? 49 : \ > (n) & (1ULL << 48) ? 48 : \ > (n) & (1ULL << 47) ? 47 : \ > (n) & (1ULL << 46) ? 46 : \ > (n) & (1ULL << 45) ? 45 : \ > (n) & (1ULL << 44) ? 44 : \ > (n) & (1ULL << 43) ? 43 : \ > (n) & (1ULL << 42) ? 42 : \ > (n) & (1ULL << 41) ? 41 : \ > (n) & (1ULL << 40) ? 40 : \ > (n) & (1ULL << 39) ? 39 : \ > (n) & (1ULL << 38) ? 38 : \ > (n) & (1ULL << 37) ? 37 : \ > (n) & (1ULL << 36) ? 36 : \ > (n) & (1ULL << 35) ? 35 : \ > (n) & (1ULL << 34) ? 34 : \ > (n) & (1ULL << 33) ? 33 : \ > (n) & (1ULL << 32) ? 32 : \ > (n) & (1ULL << 31) ? 31 : \ > (n) & (1ULL << 30) ? 30 : \ > (n) & (1ULL << 29) ? 29 : \ > (n) & (1ULL << 28) ? 28 : \ > (n) & (1ULL << 27) ? 27 : \ > (n) & (1ULL << 26) ? 26 : \ > (n) & (1ULL << 25) ? 25 : \ > (n) & (1ULL << 24) ? 24 : \ > (n) & (1ULL << 23) ? 23 : \ > (n) & (1ULL << 22) ? 22 : \ > (n) & (1ULL << 21) ? 21 : \ > (n) & (1ULL << 20) ? 20 : \ > (n) & (1ULL << 19) ? 19 : \ > (n) & (1ULL << 18) ? 18 : \ > (n) & (1ULL << 17) ? 17 : \ > (n) & (1ULL << 16) ? 16 : \ > (n) & (1ULL << 15) ? 15 : \ > (n) & (1ULL << 14) ? 14 : \ > (n) & (1ULL << 13) ? 13 : \ > (n) & (1ULL << 12) ? 12 : \ > (n) & (1ULL << 11) ? 11 : \ > (n) & (1ULL << 10) ? 10 : \ > (n) & (1ULL << 9) ? 9 : \ > (n) & (1ULL << 8) ? 8 : \ > (n) & (1ULL << 7) ? 7 : \ > (n) & (1ULL << 6) ? 6 : \ > (n) & (1ULL << 5) ? 5 : \ > (n) & (1ULL << 4) ? 4 : \ > (n) & (1ULL << 3) ? 3 : \ > (n) & (1ULL << 2) ? 2 : \ > (n) & (1ULL << 1) ? 1 : \ > (n) & (1ULL << 0) ? 0 : \ > ____ilog2_NaN() \ > ) : \ > (sizeof(n) <= 4) ? \ > __ilog2_u32(n) : \ > __ilog2_u64(n) \ > ) > > static noinline __attribute__((const)) > int old_get_order(unsigned long size) > { > int order; > > size = (size - 1) >> (PAGE_SHIFT - 1); > order = -1; > do { > size >>= 1; > order++; > } while (size); > return order; > } > > static noinline __attribute__((const)) > int __get_order(unsigned long size) > { > int order; > size--; > size >>= PAGE_SHIFT; > #if BITS_PER_LONG == 32 > order = fls(size); > #else > order = fls64(size); > #endif > return order; > } > > #define get_order(n) \ > ( \ > __builtin_constant_p(n) ? ( \ > (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > ilog2((n) - 1) - PAGE_SHIFT + 1) \ > ) : \ > __get_order(n) \ > ) > > #define order(N) \ > { (1UL << N) - 1, get_order((1UL << N) - 1) }, \ > { (1UL << N), get_order((1UL << N)) }, \ > { (1UL << N) + 1, get_order((1UL << N) + 1) } > > struct order { > unsigned long n, order; > }; > > static const struct order order_table[] = { > order(0), > order(1), > order(2), > order(3), > order(4), > order(5), > order(6), > order(7), > order(8), > order(9), > order(10), > order(11), > order(12), > order(13), > order(14), > order(15), > order(16), > order(17), > order(18), > order(19), > order(20), > order(21), > order(22), > order(23), > order(24), > order(25), > order(26), > order(27), > order(28), > order(29), > order(30), > order(31), > #if BITS_PER_LONG == 64 > order(32), > order(33), > order(34), > order(35), > #endif > { 0x2929 } > }; > > void check(int loop, unsigned long n) > { > unsigned long old, new; > > printf("[%2d]: %09lx | ", loop, n); > > old = old_get_order(n); > new = get_order(n); > > printf("%3ld, %3ld\n", old, new); > if (n != 0 && old != new) > abort(); > } > > int main(int argc, char **argv) > { > const struct order *p; > unsigned long n; > int loop; > > for (loop = 0; loop <= BITS_PER_LONG - 1; loop++) { > n = 1UL << loop; > check(loop, n - 1); > check(loop, n); > check(loop, n + 1); > } > > for (p = order_table; p->n != 0x2929; p++) { > unsigned long old, new; > > old = old_get_order(p->n); > new = p->order; > printf("%09lx\t%3ld, %3ld\n", p->n, old, new); > if (p->n != 0 && old != new) > abort(); > } > > return 0; > } > > Disassembling the x86_64 version of the above code shows: > > 0000000000400510 <old_get_order>: > 400510: 48 83 ef 01 sub $0x1,%rdi > 400514: b8 ff ff ff ff mov $0xffffffff,%eax > 400519: 48 c1 ef 0b shr $0xb,%rdi > 40051d: 0f 1f 00 nopl (%rax) > 400520: 83 c0 01 add $0x1,%eax > 400523: 48 d1 ef shr %rdi > 400526: 75 f8 jne 400520 <old_get_order+0x10> > 400528: f3 c3 repz retq > 40052a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) > > 0000000000400530 <__get_order>: > 400530: 48 83 ef 01 sub $0x1,%rdi > 400534: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax > 40053b: 48 c1 ef 0c shr $0xc,%rdi > 40053f: 48 0f bd c7 bsr %rdi,%rax > 400543: 83 c0 01 add $0x1,%eax > 400546: c3 retq > 400547: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) > 40054e: 00 00 > > As can be seen, the new __get_order() function is simpler than the > old_get_order() function. > > Signed-off-by: David Howells <dhowells@redhat.com> > Link: http://lkml.kernel.org/r/20120220223928.16199.29548.stgit@warthog.procyon.org.uk > Acked-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: H. Peter Anvin <hpa@zytor.com> > --- > include/asm-generic/getorder.h | 40 ++++++++++++++++++++++++++++------------ > 1 files changed, 28 insertions(+), 12 deletions(-) > > diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h > index 76e9687..e0fb4bf 100644 > --- a/include/asm-generic/getorder.h > +++ b/include/asm-generic/getorder.h > @@ -4,6 +4,25 @@ > #ifndef __ASSEMBLY__ > > #include <linux/compiler.h> > +#include <linux/log2.h> > + > +/* > + * Runtime evaluation of get_order() > + */ > +static inline __attribute_const__ > +int __get_order(unsigned long size) > +{ > + int order; > + > + size--; > + size >>= PAGE_SHIFT; > +#if BITS_PER_LONG == 32 > + order = fls(size); > +#else > + order = fls64(size); > +#endif > + return order; > +} > > /** > * get_order - Determine the allocation order of a memory size > @@ -27,18 +46,15 @@ > * This function may be used to initialise variables with compile time > * evaluations of constants. > */ > -static inline __attribute_const__ int get_order(unsigned long size) > -{ > - int order; > - > - size = (size - 1) >> (PAGE_SHIFT - 1); > - order = -1; > - do { > - size >>= 1; > - order++; > - } while (size); > - return order; > -} > +#define get_order(n) \ > +( \ > + __builtin_constant_p(n) ? ( \ > + (n == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ > + ((n < (1UL << PAGE_SHIFT)) ? 0 : \ > + ilog2((n) - 1) - PAGE_SHIFT + 1) \ > + ) : \ > + __get_order(n) \ > +) > > #endif /* __ASSEMBLY__ */ > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥ ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-02-29 20:29 ` Paul Gortmaker (?) @ 2012-03-01 0:31 ` Stephen Rothwell 2012-03-01 21:02 ` H. Peter Anvin 2012-03-06 14:24 ` David Howells -1 siblings, 2 replies; 9+ messages in thread From: Stephen Rothwell @ 2012-03-01 0:31 UTC (permalink / raw) To: Paul Gortmaker Cc: mingo, hpa, linux-kernel, arnd, dhowells, tglx, linux-tip-commits, linux-next [-- Attachment #1: Type: text/plain, Size: 1199 bytes --] On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker <paul.gortmaker@windriver.com> wrote: > > On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells > <dhowells@redhat.com> wrote: > > Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e > > Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e > > Author: David Howells <dhowells@redhat.com> > > AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 > > Committer: H. Peter Anvin <hpa@zytor.com> > > CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 > > > > bitops: Optimise get_order() > > This is causing build failures on non-x86 in linux next according to git bisect. Presumably it needs to include linux/bitops.h (and see below). > > +static inline __attribute_const__ > > +int __get_order(unsigned long size) > > +{ > > + int order; > > + > > + size--; > > + size >>= PAGE_SHIFT; > > +#if BITS_PER_LONG == 32 > > + order = fls(size); > > +#else > > + order = fls64(size); > > +#endif linux/bitops.h has fls_long() that does this size test and calls the right thing. -- Cheers, Stephen Rothwell sfr@canb.auug.org.au [-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --] ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-03-01 0:31 ` Stephen Rothwell @ 2012-03-01 21:02 ` H. Peter Anvin 2012-03-06 14:24 ` David Howells 1 sibling, 0 replies; 9+ messages in thread From: H. Peter Anvin @ 2012-03-01 21:02 UTC (permalink / raw) To: Stephen Rothwell Cc: Paul Gortmaker, mingo, linux-kernel, arnd, dhowells, tglx, linux-tip-commits, linux-next David, could you make a fix for this? -hpa On 02/29/2012 04:31 PM, Stephen Rothwell wrote: > On Wed, 29 Feb 2012 15:29:04 -0500 Paul Gortmaker<paul.gortmaker@windriver.com> wrote: >> >> On Mon, Feb 20, 2012 at 6:20 PM, tip-bot for David Howells >> <dhowells@redhat.com> wrote: >>> Commit-ID: d66acc39c7cee323733c8503b9de1821a56dff7e >>> Gitweb: http://git.kernel.org/tip/d66acc39c7cee323733c8503b9de1821a56dff7e >>> Author: David Howells<dhowells@redhat.com> >>> AuthorDate: Mon, 20 Feb 2012 22:39:29 +0000 >>> Committer: H. Peter Anvin<hpa@zytor.com> >>> CommitDate: Mon, 20 Feb 2012 14:47:02 -0800 >>> >>> bitops: Optimise get_order() >> >> This is causing build failures on non-x86 in linux next according to git bisect. > > Presumably it needs to include linux/bitops.h (and see below). > >>> +static inline __attribute_const__ >>> +int __get_order(unsigned long size) >>> +{ >>> + int order; >>> + >>> + size--; >>> + size>>= PAGE_SHIFT; >>> +#if BITS_PER_LONG == 32 >>> + order = fls(size); >>> +#else >>> + order = fls64(size); >>> +#endif > > linux/bitops.h has fls_long() that does this size test and calls the right thing. > ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [tip:x86/asm] bitops: Optimise get_order() 2012-03-01 0:31 ` Stephen Rothwell 2012-03-01 21:02 ` H. Peter Anvin @ 2012-03-06 14:24 ` David Howells 1 sibling, 0 replies; 9+ messages in thread From: David Howells @ 2012-03-06 14:24 UTC (permalink / raw) To: Stephen Rothwell Cc: dhowells, Paul Gortmaker, mingo, hpa, linux-kernel, arnd, tglx, linux-tip-commits, linux-next Stephen Rothwell <sfr@canb.auug.org.au> wrote: > Presumably it needs to include linux/bitops.h (and see below). That won't help. It already includes that via linux/log2.h. The error chain looks suspiciously like a circular dependency has occurred. David ^ permalink raw reply [flat|nested] 9+ messages in thread
* [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells @ 2012-02-20 23:19 ` tip-bot for David Howells 1 sibling, 0 replies; 9+ messages in thread From: tip-bot for David Howells @ 2012-02-20 23:19 UTC (permalink / raw) To: linux-tip-commits; +Cc: arnd, linux-kernel, hpa, mingo, dhowells, tglx Commit-ID: e0891a9816316b5e05fd5b0453ffe9fd6a56f489 Gitweb: http://git.kernel.org/tip/e0891a9816316b5e05fd5b0453ffe9fd6a56f489 Author: David Howells <dhowells@redhat.com> AuthorDate: Mon, 20 Feb 2012 22:39:18 +0000 Committer: H. Peter Anvin <hpa@zytor.com> CommitDate: Mon, 20 Feb 2012 14:46:55 -0800 bitops: Adjust the comment on get_order() to describe the size==0 case Adjust the comment on get_order() to note that the result of passing a size of 0 results in an undefined value. Signed-off-by: David Howells <dhowells@redhat.com> Link: http://lkml.kernel.org/r/20120220223917.16199.9416.stgit@warthog.procyon.org.uk Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: H. Peter Anvin <hpa@zytor.com> --- include/asm-generic/getorder.h | 23 ++++++++++++++++++++++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 67e7245..76e9687 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -5,7 +5,28 @@ #include <linux/compiler.h> -/* Pure 2^n version of get_order */ +/** + * get_order - Determine the allocation order of a memory size + * @size: The size for which to get the order + * + * Determine the allocation order of a particular sized block of memory. This + * is on a logarithmic scale, where: + * + * 0 -> 2^0 * PAGE_SIZE and below + * 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1 + * 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1 + * 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1 + * 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1 + * ... + * + * The order returned is used to find the smallest allocation granule required + * to hold an object of the specified size. + * + * The result is undefined if the size is 0. + * + * This function may be used to initialise variables with compile time + * evaluations of constants. + */ static inline __attribute_const__ int get_order(unsigned long size) { int order; ^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2012-03-06 14:25 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-02-20 22:39 [PATCH 1/2] Adjust the comment on get_order() to describe the size==0 case David Howells 2012-02-20 22:39 ` [PATCH 2/2] Optimise get_order() David Howells 2012-02-20 23:20 ` [tip:x86/asm] bitops: " tip-bot for David Howells 2012-02-29 20:29 ` Paul Gortmaker 2012-02-29 20:29 ` Paul Gortmaker 2012-03-01 0:31 ` Stephen Rothwell 2012-03-01 21:02 ` H. Peter Anvin 2012-03-06 14:24 ` David Howells 2012-02-20 23:19 ` [tip:x86/asm] bitops: Adjust the comment on get_order() to describe the size==0 case tip-bot for David Howells
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.