All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: P@draigBrady.com
Cc: torvalds@osdl.org, matthew@wil.cx, arjan@infradead.org,
	linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	dhowells@redhat.com
Subject: Re: [PATCH 1/3] X86: Optimise fls(), ffs() and fls64()
Date: Wed, 14 Apr 2010 14:13:35 +0100	[thread overview]
Message-ID: <18695.1271250815@redhat.com> (raw)
In-Reply-To: <4BACCB4E.7010108@draigBrady.com>

Pádraig Brady <P@draigBrady.com> wrote:

> Benchmarks would be useful for this patch set.

Okay.

Using the attached test program:

	warthog>time ./get_order 
	real    1m37.191s
	user    1m36.313s
	sys     0m0.861s
	warthog>time ./get_order x
	real    0m16.892s
	user    0m16.586s
	sys     0m0.287s
	warthog>time ./get_order x x
	real    0m7.731s
	user    0m7.727s
	sys     0m0.002s

Using the current upstream fls64() as a basis for an inlined get_order() [the
second result above] is much faster than using the current out-of-line
loop-based get_order() [the first result above].

Using my optimised inline fls64()-based get_order() [the third result above]
is even faster still.

I ran the above on my Core2 desktop box running x86_64 Fedora 12.

Also note that I compiled the test program with -O3, so I had to do things to
prevent gcc from optimising the call to fls64() or get_order() away, such as
adding up the results and sticking them in a global variable, and not having
too few values passed to get_order(), lest gcc calculate them in advance.

So it would be useful to decide if we can optimise fls() and fls64() for
x86_64.  Certainly it would be useful to replace the out-of-line get_order()
for x86_64.

David
---
#include <stdlib.h>
#include <stdio.h>

#ifndef __x86_64__
#error
#endif

#define BITS_PER_LONG 64

#define PAGE_SHIFT 12

typedef unsigned long long __u64, u64;
typedef unsigned int __u32, u32;
#define noinline	__attribute__((noinline))

static __always_inline int fls64(__u64 x)
{
	long bitpos = -1;

	asm("bsrq %1,%0"
	    : "+r" (bitpos)
	    : "rm" (x));
	return bitpos + 1;
}

static inline unsigned long __fls(unsigned long word)
{
	asm("bsr %1,%0"
	    : "=r" (word)
	    : "rm" (word));
	return word;
}
static __always_inline int old_fls64(__u64 x)
{
	if (x == 0)
		return 0;
	return __fls(x) + 1;
}

static noinline // __attribute__((const))
int old_get_order(unsigned long size)
{
	int order;

	size = (size - 1) >> (PAGE_SHIFT - 1);
	order = -1;
	do {
		size >>= 1;
		order++;
	} while (size);
	return order;
}

static inline __attribute__((const))
int __get_order_old_fls64(unsigned long size)
{
	int order;
	size--;
	size >>= PAGE_SHIFT;
	order = old_fls64(size);
	return order;
}

static inline __attribute__((const))
int __get_order(unsigned long size)
{
	int order;
	size--;
	size >>= PAGE_SHIFT;
	order = fls64(size);
	return order;
}

#define get_order_old_fls64(n)						\
	(								\
		__get_order_old_fls64(n)				\
	)

#define get_order(n)							\
	(								\
		__get_order(n)						\
	)

unsigned long prevent_optimise_out;

static noinline unsigned long test_old_get_order(void)
{
	unsigned long n, total = 0;
	long rep, loop;

	for (rep = 1000000; rep > 0; rep--) {
		for (loop = 0; loop <= 16384; loop += 4) {
			n = 1UL << loop;
			total += old_get_order(n);
		}
	}
	return total;
}

static noinline unsigned long test_get_order_old_fls64(void)
{
	unsigned long n, total = 0;
	long rep, loop;

	for (rep = 1000000; rep > 0; rep--) {
		for (loop = 0; loop <= 16384; loop += 4) {
			n = 1UL << loop;
			total += get_order_old_fls64(n);
		}
	}
	return total;
}

static noinline unsigned long test_get_order(void)
{
	unsigned long n, total = 0;
	long rep, loop;

	for (rep = 1000000; rep > 0; rep--) {
		for (loop = 0; loop <= 16384; loop += 4) {
			n = 1UL << loop;
			total += get_order(n);
		}
	}
	return total;
}

int main(int argc, char **argv)
{
	unsigned long total;

	switch (argc) {
	case 1:  total = test_old_get_order();		break;
	case 2:  total = test_get_order_old_fls64();	break;
	default: total = test_get_order();		break;
	}
	prevent_optimise_out = total;
	return 0;
}

  parent reply	other threads:[~2010-04-14 13:13 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-26 14:42 [PATCH 1/3] X86: Optimise fls(), ffs() and fls64() David Howells
2010-03-26 14:42 ` [PATCH 2/3] Adjust the comment on get_order() to describe the size==0 case David Howells
2010-03-26 14:42 ` [PATCH 3/3] Optimise get_order() David Howells
2010-03-26 17:23 ` [PATCH 1/3] X86: Optimise fls(), ffs() and fls64() Linus Torvalds
2010-03-26 17:37   ` Scott Lurndal
2010-03-26 17:42     ` Linus Torvalds
2010-04-06 13:57       ` Jamie Lokier
2010-04-06 14:40         ` Linus Torvalds
2010-03-26 17:42   ` David Howells
2010-03-26 17:45     ` Linus Torvalds
2010-03-26 17:58       ` Ralf Baechle
2010-03-26 18:03         ` Linus Torvalds
2010-03-26 18:16           ` Matthew Wilcox
2010-04-06 13:30           ` Matthew Wilcox
2010-04-14 11:49             ` David Howells
2010-04-14 14:30               ` Avi Kivity
2010-04-15  8:48                 ` David Howells
2010-04-15  8:49                   ` Avi Kivity
2010-04-15 11:41                     ` Jamie Lokier
2010-03-26 17:52     ` Matthew Wilcox
     [not found] ` <4BACCB4E.7010108@draigBrady.com>
2010-04-14 13:13   ` David Howells [this message]
  -- strict thread matches above, loose matches on Subject: below --
2010-01-13 19:39 David Howells
2010-01-13 20:15 ` Geert Uytterhoeven
2010-01-13 21:59   ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=18695.1271250815@redhat.com \
    --to=dhowells@redhat.com \
    --cc=P@draigBrady.com \
    --cc=arjan@infradead.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthew@wil.cx \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.