public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: Daniel Walker <dwalker@mvista.com>
Cc: Ingo Molnar <mingo@elte.hu>, Andrew Morton <akpm@osdl.org>,
	Linus Torvalds <torvalds@osdl.org>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [RFC][PATCH] Make MAX_RT_PRIO and MAX_USER_RT_PRIO configurable
Date: Wed, 27 Jul 2005 23:32:18 -0400	[thread overview]
Message-ID: <1122521538.29823.177.camel@localhost.localdomain> (raw)
In-Reply-To: <1122519999.29823.165.camel@localhost.localdomain>

[-- Attachment #1: Type: text/plain, Size: 2432 bytes --]

New benchmarks with my own formula.

I added my own find_first_bit algorithm, that still doesn't quite beat
Ingo's sched_find_first_bit, but it does a number on the generic
find_first_bit.  I guess the compiler has finally beat what we do in
asm.  Here's my generic algorithm.

static inline int my_find_first_bit(const unsigned long *b, unsigned size)
{
        int x = 0;
        size += 31;
        do {
                if (unlikely(*b))
                        return __ffs(*b) + x;
                b++;
                x += 32;
        } while (x < size);
        return x-32;
}


And here's the new results of my benchmark: Comments added.

/*
 * I put in what is returned to see what really is returned is
 * correct.  Funny that sched_find_first_bit is wrong if there
 * isn't a bit set. But this shouldn't be called if there isn't
 * one set.
 */
sched=128  ffb=160  my=160
clock speed = 00000000:7f2f2cbb 2133798075 ticks per second
/* Here I set the last bit of 5 32bit words. */
sched=159  ffb=159  my=159
last bit set
generic ffb: 00000000:026a1cea
time: 0.018984294us
sched ffb: 00000000:001ee719
time: 0.000949125us
/*
 * My time is 8ns, 8 times longer than Ingo's if the last
 * bit is set (most likely case), but still more than twice 
 * as fast as ffb.
 */
my ffb: 00000000:0106a3dd
time: 0.008066546us
sched=0  ffb=0  my=0

first bit set
generic ffb: 00000000:01ee8e2b
time: 0.015189432us
sched ffb: 00000000:001eea92
time: 0.000949542us
/*
 * With the first bit set, I'm still slower than Ingo's
 * but only by 2, and I still beat the pants off of ffb.
 */
my ffb: 00000000:004d5de6
time: 0.002376190us


Conclusion:  it looks like it might be time to change the i386 generic
ffb to something else.

Oh, and if you are wondering...


$ gcc -v
Using built-in specs.
Target: i486-linux-gnu
Configured with: ../src/configure -v --enable-languages=c,c
++,java,f95,objc,ada,treelang --prefix=/usr --enable-shared
--with-system-zlib --libexecdir=/usr/lib --enable-nls
--without-included-gettext --enable-threads=posix --program-suffix=-4.0
--enable-__cxa_atexit --enable-libstdcxx-allocator=mt
--enable-clocale=gnu --enable-libstdcxx-debug --enable-java-gc=boehm
--enable-java-awt=gtk
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-4.0-1.4.2.0/jre
--enable-mpfr --disable-werror --enable-checking=release i486-linux-gnu
Thread model: posix
gcc version 4.0.1 (Debian 4.0.1-2)

-- Steve

Attached is new version of ffb.c

[-- Attachment #2: ffb.c --]
[-- Type: text/x-csrc, Size: 3385 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define unlikely(x)	__builtin_expect(!!(x), 0)

static inline int find_first_bit(const unsigned long *addr, unsigned size)
{
	int d0, d1;
	int res;

	/* This looks at memory. Mark it volatile to tell gcc not to move it around */
	__asm__ __volatile__(
		"xorl %%eax,%%eax\n\t"
		"repe; scasl\n\t"
		"jz 1f\n\t"
		"leal -4(%%edi),%%edi\n\t"
		"bsfl (%%edi),%%eax\n"
		"1:\tsubl %%ebx,%%edi\n\t"
		"shll $3,%%edi\n\t"
		"addl %%edi,%%eax"
		:"=a" (res), "=&c" (d0), "=&D" (d1)
		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
	return res;
}

static inline unsigned long __ffs(unsigned long word)
{
	__asm__("bsfl %1,%0"
		:"=r" (word)
		:"rm" (word));
	return word;
}

static inline int sched_find_first_bit(const unsigned long *b)
{
	if (unlikely(b[0]))
		return __ffs(b[0]);
	if (unlikely(b[1]))
		return __ffs(b[1]) + 32;
	if (unlikely(b[2]))
		return __ffs(b[2]) + 64;
	if (b[3])
		return __ffs(b[3]) + 96;
	return __ffs(b[4]) + 128;
}

static inline int my_find_first_bit(const unsigned long *b, unsigned size)
{
	int x = 0;
	size += 31;
	do {
		if (unlikely(*b))
			return __ffs(*b) + x;
		b++;
		x += 32;
	} while (x < size);
	return x-32;
}

#define rdtscll(val) \
	     __asm__ __volatile__("rdtsc" : "=A" (val))

#define rdtsc(low,high) \
	     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))

static unsigned long array[5];

#define ITER 1000000 /* 1,000,000 times */

void testit(unsigned long *array, unsigned long long clock)
{
	unsigned long long s;
	unsigned long long e;
	unsigned long long t;
	double f;
	int i;
	int x;
	
	/*
	 * Since ITER is 1,000,000 the times will be in us.
	 */

	rdtscll(s);
	for (i=0; i < ITER; i++) 
		x = find_first_bit(array,140);
	rdtscll(e);
	t = e - s;
	f = (float)t / (float)clock;
	printf("generic ffb: %08lx:%08lx\n",
			(unsigned long)(t>>32),(unsigned long)t);
	printf("time: %.09fus\n",f);

	rdtscll(s);
	for (i=0; i < ITER; i++) 
		x = sched_find_first_bit(array);
	rdtscll(e);
	t = e - s;
	f = (float)t / (float)clock;
	printf("sched ffb: %08lx:%08lx\n",
			(unsigned long)(t>>32),(unsigned long)t);
	printf("time: %.09fus\n",f);

	rdtscll(s);
	for (i=0; i < ITER; i++) 
		x = my_find_first_bit(array,140);
	rdtscll(e);
	t = e - s;
	f = (float)t / (float)clock;
	printf("my ffb: %08lx:%08lx\n",
			(unsigned long)(t>>32),(unsigned long)t);
	printf("time: %.09fus\n",f);

}

int main(int argc, char **argv)
{
	unsigned long long s;
	unsigned long long e;
	unsigned long long t;
	unsigned long long clock;

	printf("sched=%d  ffb=%d  my=%d\n",
			sched_find_first_bit(array),
			find_first_bit(array,140),
			my_find_first_bit(array,140));
	/*
	 * Calculate BS time, just to get an 
	 * idea of the tsc speed.
	 */
	rdtscll(s);
	sleep(8);
	rdtscll(e);
	
	t = e - s;
	t >>= 3;
	printf("clock speed = %08lx:%08lx %llu ticks per second\n",
			(unsigned long)(t>>32),(unsigned long)t,
			t);
	clock = t;

	array[4] = 0x80000000;
	printf("sched=%d  ffb=%d  my=%d\n",
			sched_find_first_bit(array),
			find_first_bit(array,140),
			my_find_first_bit(array,140));
	printf("last bit set\n");
	testit(array,clock);

	array[0] = 0x00000001;
	printf("sched=%d  ffb=%d  my=%d\n",
			sched_find_first_bit(array),
			find_first_bit(array,140),
			my_find_first_bit(array,140));
	printf("\nfirst bit set\n");
	testit(array,clock);
	
	exit(0);
}

  reply	other threads:[~2005-07-28  3:32 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-07-27 14:13 [RFC][PATCH] Make MAX_RT_PRIO and MAX_USER_RT_PRIO configurable Steven Rostedt
2005-07-27 14:17 ` Ingo Molnar
2005-07-27 14:26   ` Steven Rostedt
2005-07-27 14:33     ` Ingo Molnar
2005-07-27 14:47       ` [PATCH] safty check of MAX_RT_PRIO >= MAX_USER_RT_PRIO Steven Rostedt
2005-07-27 15:05         ` Steven Rostedt
2005-07-27 18:52         ` Ingo Molnar
2005-07-27 14:53       ` [RFC][PATCH] Make MAX_RT_PRIO and MAX_USER_RT_PRIO configurable Esben Nielsen
2005-07-27 15:02         ` Steven Rostedt
2005-07-27 16:09         ` K.R. Foley
2005-07-27 17:01           ` Esben Nielsen
2005-07-27 17:25             ` Steven Rostedt
2005-07-27 21:32               ` Esben Nielsen
2005-07-28 12:17                 ` Steven Rostedt
2005-07-28  7:22               ` Ingo Molnar
2005-07-28 11:53                 ` Steven Rostedt
2005-07-27 17:42             ` K.R. Foley
2005-07-28  9:59               ` Esben Nielsen
2005-07-27 14:28   ` Steven Rostedt
2005-07-27 14:38     ` Ingo Molnar
2005-07-27 14:46       ` Steven Rostedt
2005-07-28  7:33         ` Ingo Molnar
2005-07-28  1:42   ` Matt Mackall
2005-07-28  1:00 ` Daniel Walker
2005-07-28  1:20   ` Lee Revell
2005-07-28  1:26     ` Steven Rostedt
2005-07-28  1:25   ` Steven Rostedt
2005-07-28  3:06     ` Steven Rostedt
2005-07-28  3:32       ` Steven Rostedt [this message]
2005-07-28  3:45         ` Steven Rostedt
2005-07-28  3:51           ` Nick Piggin
2005-07-28 11:43             ` [PATCH] speed up on find_first_bit for i386 (let compiler do the work) Steven Rostedt
2005-07-28 12:45               ` Steven Rostedt
2005-07-28 15:31                 ` Linus Torvalds
2005-07-28 15:30               ` Linus Torvalds
2005-07-28 15:47                 ` Steven Rostedt
2005-07-28 16:34                   ` Maciej W. Rozycki
2005-07-28 16:57                     ` Steven Rostedt
2005-07-28 17:25                       ` Linus Torvalds
2005-07-29 10:03                         ` David Woodhouse
2005-07-29 14:41                           ` Maciej W. Rozycki
2005-07-29 16:23                           ` Linus Torvalds
2005-07-29 14:39                         ` Maciej W. Rozycki
2005-07-29 16:29                           ` Linus Torvalds
2005-07-29 17:14                             ` Maciej W. Rozycki
2005-07-28 17:17                     ` Linus Torvalds
2005-07-29 15:09                       ` Maciej W. Rozycki
2005-07-28 18:25                     ` Steven Rostedt
2005-07-28 18:56                       ` Linus Torvalds
2005-07-28 17:52               ` Mitchell Blank Jr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1122521538.29823.177.camel@localhost.localdomain \
    --to=rostedt@goodmis.org \
    --cc=akpm@osdl.org \
    --cc=dwalker@mvista.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox