From: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
To: falk.hueffner@student.uni-tuebingen.de, torvalds@transmeta.com,
linux-kernel@vger.kernel.org
Subject: [PATCH 2.5.52] Optimize include/asm-ARCH/page.h:get_order() (take 1.0)
Date: Tue, 17 Dec 2002 13:25:34 -0800 [thread overview]
Message-ID: <E18OPDa-0006Zj-00@milikk> (raw)
Hi all
This patch is a reorganization and optimization of the get_order()
function.
Currently each architecture defines its version in
asm-${ARCH}/page.h, being most of them the same function. I have
unified the implementation in asm-generic/page.h, as
generic_get_order(). Then each arch can implement it's own version
or use the generic one [as the patch stands now, every
architecture that had the generic version uses
generic_get_oder()].
The optimization is done in the way the generic_get_order() is
implemented; currently it is a loop - however, that can be highly
optimized by using fls() [available on each architecture or as
generic_fls() where not optimized].
On my tests in a tight loop doing repeated get_order() calls (see
included test program), I got an aprox average run time of 6m24s
for the unoptimized version (the old one, executing "time ./goo -v
speed-new") and 2m32s for the optimized version (executing "time
./goo -v speed-old"). The test machine is a 2xP3 933MHhz 1.5GB.
Even using generic_fls() on ia32 yields better results - my guess
is it caused for it being a fixed-path function instead of a for()
loop.
I was only able to test it and try it in ia32, so I don't know if
I broke anything else in other archs; reports welcome.
CAVEATS:
- the #include <asm-generic/page.h> ... I don't know if it is the
best way to do it. Any ideas?
- Some arches still define fls() as the generic_fls(), instead of
using bit-searching ASM instructions - I lack the knowledge to
fix it, though.
Changelog:
- Pull up to 2.5.52
- Fixes some obvious, blatant and stupid errors I did in the
first release.
- Increase the speed up.
Test program (patch follows):
compile with 'cc -Iinclude/ -Wall -g -O2 goo.c -o goo' from the
root of the patched linux kernel tree.
#define __KERNEL__
#define __ASSEMBLY__
#include "asm-i386/bitops.h"
#include "asm-i386/page.h"
#include "asm-generic/page.h"
#include <stdio.h>
#include <getopt.h>
#include <stdlib.h>
/* Old, unoptimized version */
static __inline__
int get_order_0 (unsigned long size)
{
int order;
size = (size-1) >> (PAGE_SHIFT-1);
order = -1;
do {
size >>= 1;
order++;
} while (size);
return order;
}
unsigned long verbose_step = 0;
static __inline__
void progress (unsigned long cnt)
{
if (verbose_step && (cnt % verbose_step) == 0)
printf ("Going through %lu\n", cnt);
}
void help (FILE *f)
{
fprintf (f,
"Usage: goo [-b BOTTOM] [-t TOP] [-s STEP] [-v VSTEP] [-h]\n"
" {verify,speed-new,speed-old}\n"
"\n"
"-b BOTTOM where to start counting [0]\n"
"-t TOP where to stop counting [~0UL]\n"
"-s STEP count increment [1]\n"
"-v VSTEP Every VSTEP report the count [0]\n"
"\n"
"verify Verify that the old and new implementations behave the same\n"
" from BOTTOM to TOP with STEP.\n"
"\n"
"speed-new Call the optimized get_order() for all the numbers from\n"
" BOTTOM to TOP with STEP.\n"
"\n"
"speed-old Call the original get_order() for all the numbers from\n"
" BOTTOM to TOP with STEP.\n"
"\n"
"It is nice to have a wide range, so that the timing is better; use -v\n"
"if you want reporting because it feels nice. The default range covers\n"
"all that an unsigned long can hold.\n"
);
}
/* Test driver
**
** We just count from 0 to ~0UL; in verify mode, we verify that
** _every_ number get's the same order with the old and new
** versions. In the speed mode we just run in a tight loop so that
** it can be rawly measured how fast is the new version (speed-new)
** against the old version (speed-old). time(1) is your friend.
*/
int main (int argc, char **argv)
{
const char *mode;
int opterr, c;
unsigned long bottom = 0,
top = ~0UL,
step = 1,
dummy = 0,
cnt;
opterr = 0;
while ((c = getopt (argc, argv, "b:t:s:v:h")) != -1)
switch (c) {
case 'b':
bottom = strtoul (optarg, NULL, 10);
break;
case 't':
top = strtoul (optarg, NULL, 10);
break;
case 's':
step = strtoul (optarg, NULL, 10);
break;
case 'v':
verbose_step = strtoul (optarg, NULL, 10);
break;
case 'h':
help (stdout);
exit (0);
break;
case '?':
default:
fprintf (stderr, "Unknown option %s\n", argv[optind]);
help (stderr);
exit (1);
}
if (argc < 2) {
help (stderr);
return 1;
}
mode = argv[optind];
if (!strcmp (mode, "verify")) {
for (cnt = bottom; cnt < top; cnt += step) {
if (get_order_0 (cnt) != generic_get_order (cnt))
printf ("error: %lu - different results\n", cnt);
progress (cnt);
}
}
else if (!strcmp (mode, "speed-old")) {
for (cnt = bottom; cnt < top; cnt += step) {
dummy += get_order_0 (cnt);
progress (cnt);
}
}
else if (!strcmp (mode, "speed-new")) {
for (cnt = bottom; cnt < top; cnt += step) {
dummy += generic_get_order (cnt);
progress (cnt);
}
}
else {
fprintf (stderr, "Unknown mode\n");
help (stderr);
exit (1);
}
printf ("accumulated dummy = %lu\n", dummy);
return 0;
}
/*
** Local Variables:
** compile-command: "cc -Iinclude/ -Wall -g -O2 goo.c -o goo"
** End:
*/
diff -u include/asm-alpha/page.h:1.1.1.1 include/asm-alpha/page.h:1.1.1.1.6.1
--- include/asm-alpha/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-alpha/page.h Mon Dec 16 20:36:48 2002
@@ -67,18 +67,17 @@
#define PAGE_BUG(page) BUG()
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
+
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#endif /* !__ASSEMBLY__ */
diff -u include/asm-arm/page.h:1.1.1.1 include/asm-arm/page.h:1.1.1.1.6.1
--- include/asm-arm/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-arm/page.h Mon Dec 16 20:36:48 2002
@@ -175,18 +175,16 @@
#endif
-/* Pure 2^n version of get_order */
-static inline int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#include <asm/memory.h>
diff -u /dev/null include/asm-generic/page.h:1.1.2.1
--- /dev/null Tue Dec 17 13:20:35 2002
+++ include/asm-generic/page.h Mon Dec 16 20:36:48 2002
@@ -0,0 +1,20 @@
+#ifndef _GENERIC_PAGE_H
+#define _GENERIC_PAGE_H
+
+#include <asm/bitops.h>
+
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+static __inline__
+int generic_get_order (unsigned long s)
+{
+ s = (s - 1) >> PAGE_SHIFT;
+ if (s == 0)
+ return 0;
+ return fls (s);
+}
+
+#endif _GENERIC_PAGE_H
+
diff -u include/asm-i386/bitops.h:1.1.1.3 include/asm-i386/bitops.h:1.1.1.3.2.1
--- include/asm-i386/bitops.h:1.1.1.3 Wed Dec 11 11:13:36 2002
+++ include/asm-i386/bitops.h Mon Dec 16 20:36:48 2002
@@ -416,11 +416,19 @@
return word;
}
-/*
- * fls: find last bit set.
+/**
+ * __fls - find last bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
*/
-
-#define fls(x) generic_fls(x)
+static __inline__ unsigned long __fls(unsigned long word)
+{
+ __asm__("bsrl %1,%0"
+ :"=r" (word)
+ :"rm" (word));
+ return word;
+}
#ifdef __KERNEL__
@@ -456,6 +464,23 @@
int r;
__asm__("bsfl %1,%0\n\t"
+ "jnz 1f\n\t"
+ "movl $-1,%0\n"
+ "1:" : "=r" (r) : "g" (x));
+ return r+1;
+}
+
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * Check out comments for ffs()
+ */
+static __inline__ int fls(int x)
+{
+ int r;
+
+ __asm__("bsrl %1,%0\n\t"
"jnz 1f\n\t"
"movl $-1,%0\n"
"1:" : "=r" (r) : "g" (x));
diff -u include/asm-i386/page.h:1.1.1.1 include/asm-i386/page.h:1.1.1.1.6.1
--- include/asm-i386/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-i386/page.h Mon Dec 16 20:36:48 2002
@@ -120,18 +120,17 @@
BUG(); \
} while (0)
-/* Pure 2^n version of get_order */
-static __inline__ int get_order(unsigned long size)
-{
- int order;
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
+
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#endif /* __ASSEMBLY__ */
diff -u include/asm-m68k/page.h:1.1.1.2 include/asm-m68k/page.h:1.1.1.2.2.1
--- include/asm-m68k/page.h:1.1.1.2 Wed Dec 11 11:07:56 2002
+++ include/asm-m68k/page.h Mon Dec 16 20:36:48 2002
@@ -100,18 +100,17 @@
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
+
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#endif /* !__ASSEMBLY__ */
diff -u include/asm-m68knommu/page.h:1.1.2.1 include/asm-m68knommu/page.h:1.1.2.1.2.1
--- include/asm-m68knommu/page.h:1.1.2.1 Wed Dec 11 11:07:56 2002
+++ include/asm-m68knommu/page.h Mon Dec 16 20:36:48 2002
@@ -51,19 +51,18 @@
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
extern unsigned long memory_start;
extern unsigned long memory_end;
diff -u include/asm-mips/page.h:1.1.1.1 include/asm-mips/page.h:1.1.1.1.6.1
--- include/asm-mips/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-mips/page.h Mon Dec 16 20:36:48 2002
@@ -49,18 +49,16 @@
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#endif /* _LANGUAGE_ASSEMBLY */
diff -u include/asm-parisc/page.h:1.1.1.1 include/asm-parisc/page.h:1.1.1.1.6.1
--- include/asm-parisc/page.h:1.1.1.1 Wed Dec 11 11:06:00 2002
+++ include/asm-parisc/page.h Mon Dec 16 20:36:48 2002
@@ -53,19 +53,18 @@
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#ifdef __LP64__
#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
diff -u include/asm-ppc64/page.h:1.1.1.2 include/asm-ppc64/page.h:1.1.1.2.2.1
--- include/asm-ppc64/page.h:1.1.1.2 Wed Dec 11 11:13:36 2002
+++ include/asm-ppc64/page.h Mon Dec 16 20:36:48 2002
@@ -119,19 +119,18 @@
#define PAGE_BUG(page) do { BUG(); } while (0)
-/* Pure 2^n version of get_order */
-static inline int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
diff -u include/asm-s390/page.h:1.1.1.2 include/asm-s390/page.h:1.1.1.2.2.1
--- include/asm-s390/page.h:1.1.1.2 Mon Dec 16 18:44:31 2002
+++ include/asm-s390/page.h Mon Dec 16 20:36:48 2002
@@ -71,18 +71,16 @@
BUG(); \
} while (0)
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
/*
diff -u include/asm-s390x/page.h:1.1.1.1 include/asm-s390x/page.h:1.1.1.1.6.1
--- include/asm-s390x/page.h:1.1.1.1 Wed Dec 11 11:06:00 2002
+++ include/asm-s390x/page.h Mon Dec 16 20:36:48 2002
@@ -69,18 +69,16 @@
BUG(); \
} while (0)
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
/*
diff -u include/asm-sh/page.h:1.1.1.1 include/asm-sh/page.h:1.1.1.1.6.1
--- include/asm-sh/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-sh/page.h Mon Dec 16 20:36:48 2002
@@ -102,19 +102,18 @@
BUG(); \
} while (0)
-/* Pure 2^n version of get_order */
-static __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#endif
diff -u include/asm-sparc/page.h:1.1.1.2 include/asm-sparc/page.h:1.1.1.2.2.1
--- include/asm-sparc/page.h:1.1.1.2 Wed Dec 11 11:13:36 2002
+++ include/asm-sparc/page.h Mon Dec 16 20:36:48 2002
@@ -147,19 +147,18 @@
#define TASK_UNMAPPED_BASE BTFIXUP_SETHI(sparc_unmapped_base)
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#else /* !(__ASSEMBLY__) */
diff -u include/asm-sparc64/page.h:1.1.1.1 include/asm-sparc64/page.h:1.1.1.1.6.1
--- include/asm-sparc64/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-sparc64/page.h Mon Dec 16 20:36:48 2002
@@ -161,18 +161,16 @@
extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS];
-/* Pure 2^n version of get_order */
-static __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
#endif /* !(__ASSEMBLY__) */
diff -u include/asm-v850/page.h:1.1.2.1 include/asm-v850/page.h:1.1.2.1.2.1
--- include/asm-v850/page.h:1.1.2.1 Wed Dec 11 11:07:56 2002
+++ include/asm-v850/page.h Mon Dec 16 20:36:48 2002
@@ -98,19 +98,18 @@
#define BUG() __bug()
#define PAGE_BUG(page) __bug()
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order (unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#endif /* !__ASSEMBLY__ */
diff -u include/asm-x86_64/page.h:1.1.1.1 include/asm-x86_64/page.h:1.1.1.1.6.1
--- include/asm-x86_64/page.h:1.1.1.1 Wed Dec 11 11:06:01 2002
+++ include/asm-x86_64/page.h Mon Dec 16 20:36:48 2002
@@ -80,19 +80,18 @@
#define PAGE_BUG(page) BUG()
void out_of_line_bug(void);
-/* Pure 2^n version of get_order */
-extern __inline__ int get_order(unsigned long size)
-{
- int order;
+/* Return 'n' in how many 2^n pages are needed to store s bytes.
+** (n == 0 for s == 0)
+*/
+
+#include <asm-generic/page.h>
- size = (size-1) >> (PAGE_SHIFT-1);
- order = -1;
- do {
- size >>= 1;
- order++;
- } while (size);
- return order;
+static __inline__
+int get_order (unsigned long s)
+{
+ return generic_get_order (s);
}
+
#endif /* __ASSEMBLY__ */
--
Inaky Perez-Gonzalez -- Not speaking for Intel - opinions are my own [or my fault]
reply other threads:[~2002-12-17 21:23 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E18OPDa-0006Zj-00@milikk \
--to=inaky.perez-gonzalez@intel.com \
--cc=falk.hueffner@student.uni-tuebingen.de \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.