From: David Mosberger <davidm@napali.hpl.hp.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)]
Date: Sat, 09 Apr 2005 06:00:51 +0000 [thread overview]
Message-ID: <16983.28563.65067.867958@napali.hpl.hp.com> (raw)
In-Reply-To: <20050408103324.6c5231df.akpm@osdl.org>
>>>>> On Fri, 8 Apr 2005 22:16:53 -0700, Grant Grundler <iod00d@hp.com> said:
Grant> Results on my 1.5hz Madison are slightly different than what was
Grant> previously posted:
Grant> grundler@iota:/usr/src/userspace$ gcc-3.4 -O2 test_fls.c
Grant> grundler@iota:/usr/src/userspace$ ./a.out
Grant> done with correctness test
Grant> overhead: 5.342 ns
Grant> generic: 8.013 ns
Grant> womack: 8.680 ns
Grant> arch: 8.681 ns
Grant> popcount: 7.345 ns
Grant> ia64_fls: 7.345 ns
Grant> popcount64: 8.680 ns
Yes, the loop is very sensitive to small changes, so I'm not surprised
the timing changes. I tried a different approach: summing up the
return values and printing the final value. That's more portable and
almost guarantees that the loop won't get eliminated. With that, I
get:
overhead: 4.678 ns (sum=0)
generic: 9.363 ns (sumíac3f77)
womack: 10.045 ns (sumå1e2548)
popcount: 8.718 ns (sumô331f94)
arch: 10.020 ns (sumå4a26b6)
clz: 10.029 ns (sumå4d1e96)
ia64_fls: 8.017 ns (sum=0xd319c7ee)
popcount64: 10.033 ns (sum=0xb5879d84)
whereas before it was:
overhead: 4.677 ns
generic: 8.687 ns
womack: 9.358 ns
popcount: 6.680 ns
arch: 9.356 ns
clz: 9.353 ns
ia64_fls: 8.004 ns
popcount64: 10.004 ns
Fortunately, the changes don't affect the conclusions: ia64_fls() is
the right choice if you need a full 64-bit fls and popcount is best
for the Linux/portable fls().
(Note: I added a "clz" variant which builds on __builtin_clz(); it
turns out this ends up generating the same code as the "arch"
variant.)
--david
-------------------
/*
* Quick hack to verify ia64 fls() was (NOT) correct.
* Use as framework for testing other arches.
*
* Copyright (C) 2005 Grant Grundler (grundler at parisc-linux.org)
*
* Copies code from:
* Copyright (C) 1998-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* testfls.c may be redistributed under GPL 2.0.
*/
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#ifdef __INTEL_COMPILER
# include <ia64intrin.h>
# define ia64_getf_exp(x) __getf_exp(x)
# define popcount(x) _m64_popcnt(x)
#else
# define ia64_getf_exp(x) \
({ \
long ia64_intri_res; \
\
asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x)); \
\
ia64_intri_res; \
})
# define ia64_popcnt(x) \
({ \
unsigned long ia64_intri_res; \
asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \
\
ia64_intri_res; \
})
# define popcount(x) __builtin_popcountl(x)
#endif
static inline unsigned long
ia64_fls (unsigned long x)
{
long double d = x;
long exp;
exp = ia64_getf_exp(d);
return exp - 0xffff;
}
static inline int
arch_fls (int x)
{
if (!x)
return 0;
return ia64_fls((unsigned int) x) + 1;
}
/*
* fls: find last bit set.
*/
static __inline__ int generic_fls(int x)
{
int r = 32;
if (!x)
return 0;
if (!(x & 0xffff0000u)) {
x <<= 16;
r -= 16;
}
if (!(x & 0xff000000u)) {
x <<= 8;
r -= 8;
}
if (!(x & 0xf0000000u)) {
x <<= 4;
r -= 4;
}
if (!(x & 0xc0000000u)) {
x <<= 2;
r -= 2;
}
if (!(x & 0x80000000u)) {
x <<= 1;
r -= 1;
}
return r;
}
static __inline__ int womack_fls(int x)
{
unsigned int t = x;
int r = 1;
if (!t)
return 0;
if (t & 0xffff0000u) {
t >>= 16;
r += 16;
}
if (t & 0xff00) {
t >>= 8;
r += 8;
}
if (t & 0xf0u) {
t >>= 4;
r += 4;
}
/*
* This is based on an article by Paul Womack posted to
* comp.arch on May 15, 2000.
*
* Last 4 bits are handled by a lookup table. Each
* table-entry occupies only 2 bits, so we can encode the
* table in 16*2 = 32 bits. The table value is constructed
* like this:
*
* 0000 = don't care (not possible)
* 0001 = 1
* 0010 = 2
* 0011 = 2
* 0100 = 3
* 0101 = 3
* 0110 = 3
* 0111 = 3
* 1000 = 4
* 1001 = 4
* 1010 = 4
* 1011 = 4
* 1100 = 4
* 1101 = 4
* 1110 = 4
* 1111 = 4
*
* to make it fit in 2 bits, we subtract 1 before encoding, so
* in binary:
*
* = 0b11111111111111111010101001010000
* 3 3 3 3 3 3 3 3 2 2 2 2 1 1 0 0
* hex (thanks, bc!)
* = 0xffffaa50
*/
return r + ((0xffffaa50u >> (2 * t)) & 0x3);
}
static __inline__ int
popcount_fls(int t)
{
unsigned long x = t & 0xffffffffu;
if (!x)
return 0;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return popcount(x);
}
static __inline__ unsigned long
popcount_fls64(unsigned long x)
{
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x |= x >> 32;
return popcount(x) - 1;
}
static __inline__ int
clz_fls (int x)
{
if (!x)
return 0;
return 32 - __builtin_clz(x);
}
int testval[] = { -2, -1, 0, 6, 10 };
static inline void
check (int val)
{
if (arch_fls(val) != generic_fls(val))
printf ("arch_fls(%08x) %6d generic_fls(%08x) %6d\n",
val, arch_fls(val), val, generic_fls(val));
if (womack_fls(val) != generic_fls(val))
printf ("arch_fls(%08x) %6d generic_fls(%08x) %6d\n",
val, womack_fls(val), val, generic_fls(val));
if (popcount_fls(val) != generic_fls(val))
printf ("popcount_fls(%08x) %6d generic_fls(%08x) %6d\n",
val, popcount_fls(val), val, generic_fls(val));
if (clz_fls(val) != generic_fls(val))
printf ("clz_fls(%08x) %6d generic_fls(%08x) %6d\n",
val, clz_fls(val), val, generic_fls(val));
}
static inline void
check64 (unsigned long val)
{
if (!val)
return;
if (popcount_fls64(val) != ia64_fls(val))
printf ("popcount_fls64(%016lx) %6ld ia64_fls(%016lx) %6ld\n",
val, popcount_fls64(val), val, ia64_fls(val));
}
static int
empty (int x)
{
return 0;
}
static double
measure (const char *label, int (*func)(int), double offset)
{
struct timeval start, stop;
long i, count = 1000000, sum = 0;
double t;
while (1) {
gettimeofday(&start, NULL);
for (i = 0; i < count; ++i)
(*func)(i | (i << 16));
gettimeofday(&stop, NULL);
t = ((stop.tv_sec + 1e-6*stop.tv_usec)
- (start.tv_sec + 1e-6*start.tv_usec));
if (t >= 1.0)
break;
if (t <= 0.0)
t = 1e-3;
count = (1.1 / t) * count;
}
t = (t / count) * 1e9 - offset;
printf ("%10s: %9.3f ns (sum=%lx)\n", label, t, sum);
return t;
}
static double
measure64 (const char *label, unsigned long (*func)(unsigned long),
double offset)
{
struct timeval start, stop;
long i, count = 1000000, sum = 0;
double t;
while (1) {
gettimeofday(&start, NULL);
for (i = 0; i < count; ++i)
(*func)(i | (i << 16));
gettimeofday(&stop, NULL);
t = ((stop.tv_sec + 1e-6*stop.tv_usec)
- (start.tv_sec + 1e-6*start.tv_usec));
if (t >= 1.0)
break;
if (t <= 0.0)
t = 1e-3;
count = (1.1 / t) * count;
}
t = (t / count) * 1e9 - offset;
printf ("%10s: %9.3f ns (sum=0x%lx)\n", label, t, sum);
return t;
}
int
main (int argc, char **argv)
{
double overhead;
int j;
for (j = 0; j < sizeof(testval)/sizeof(int); j++) {
check(testval[j]);
check64(testval[j]);
}
for (j = 0; j < 32; ++j) {
check(1 << j);
check((1 << j) - 1);
check((1 << j) + 1);
}
for (j = 0; j < 64; ++j) {
check(1UL << j);
check((1UL << j) - 1);
check((1UL << j) + 1);
}
printf ("done with correctness test\n");
overhead = measure("overhead", empty, 0.0);
measure("generic", generic_fls, overhead);
measure("womack", womack_fls, overhead);
measure("popcount", popcount_fls, overhead);
measure("arch", arch_fls, overhead);
measure("clz", clz_fls, overhead);
measure64("ia64_fls", ia64_fls, overhead);
measure64("popcount64", popcount_fls64, overhead);
return 0;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2005-04-09 6:00 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-04-08 17:33 [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random Andrew Morton
2005-04-08 17:48 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)] Matt Mackall
2005-04-08 18:02 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random Andrew Morton
2005-04-08 19:05 ` David Mosberger
2005-04-08 20:46 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)] David Mosberger
2005-04-08 22:49 ` Matt Mackall
2005-04-08 23:01 ` David Mosberger
2005-04-08 23:02 ` Luck, Tony
2005-04-08 23:15 ` Matt Mackall
2005-04-08 23:17 ` Matt Mackall
2005-04-08 23:19 ` David Mosberger
2005-04-08 23:49 ` David Mosberger
2005-04-09 0:21 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random David Mosberger
2005-04-09 0:28 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)] Matt Mackall
2005-04-09 0:34 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random Arthur Kepner
2005-04-09 0:46 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)] Grant Grundler
2005-04-09 3:00 ` Grant Grundler
2005-04-09 4:05 ` David Mosberger
2005-04-09 4:32 ` David Mosberger
2005-04-09 5:09 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random David Mosberger
2005-04-09 5:16 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random problem on 2.6.12-rc1)] Grant Grundler
2005-04-09 6:00 ` David Mosberger [this message]
2005-04-22 3:50 ` [mpm@selenic.com: Re: buggy ia64_fls() ? (was Re: /dev/random Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=16983.28563.65067.867958@napali.hpl.hp.com \
--to=davidm@napali.hpl.hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox