All of lore.kernel.org
 help / color / mirror / Atom feed
* [parisc-linux] uniline ?
@ 2004-06-06 19:22 Joel Soete
       [not found] ` <200407310929.29022.mszick@goquest.com>
  0 siblings, 1 reply; 3+ messages in thread
From: Joel Soete @ 2004-06-06 19:22 UTC (permalink / raw)
  To: parisc-linux

Hello all,

reading this:
ChangeSet 2004/06/04 16:17:51-07:00, akpm @ osdl.org [diffview]

[PATCH] ia64: uninline find_next_bit on ia64

From: Paul Jackson <pj@sgi.com>

Move the page of code (~700 bytes of instructions) for find_next_bit and
find_next_zero_bit from inline in include/asm-ia64/bitops.h to a real function
in arch/ia64/lib/bitops.c, leaving a declaration and macro wrapper behind.

The other arch's with almost this same code might want to also uninline it:
alpha, parisc, ppc, sh, sparc, sparc64.

These are too big to inline.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Mosberger <davidm@hpl.hp.com>

I play to make an equivalent patch for paric:
diff -Naur linux-2.6.a/arch/parisc/lib/bitops.c linux-2.6.b/arch/parisc/lib/bitops.c
--- linux-2.6.a/arch/parisc/lib/bitops.c        2004-06-06 15:16:28.000000000 +0200
+++ linux-2.6.b/arch/parisc/lib/bitops.c        2004-06-06 19:53:09.312540000 +0200
@@ -11,6 +11,7 @@
  #include <linux/spinlock.h>
  #include <asm/system.h>
  #include <asm/atomic.h>
+#include <asm/bitops.h>

  #ifdef CONFIG_SMP
  atomic_lock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
@@ -82,3 +83,126 @@
         atomic_spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
         return (unsigned long)prev;
  }
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long __find_next_zero_bit(void * addr, unsigned long size, unsigned long offset)
+{
+       unsigned long * p = ((unsigned long *) addr) + (offset >> SHIFT_PER_LONG);
+       unsigned long result = offset & ~(BITS_PER_LONG-1);
+       unsigned long tmp;
+
+       if (offset >= size)
+               return size;
+       size -= result;
+       offset &= (BITS_PER_LONG-1);
+       if (offset) {
+               tmp = *(p++);
+               tmp |= ~0UL >> (BITS_PER_LONG-offset);
+               if (size < BITS_PER_LONG)
+                       goto found_first;
+               if (~tmp)
+                       goto found_middle;
+               size -= BITS_PER_LONG;
+               result += BITS_PER_LONG;
+       }
+       while (size & ~(BITS_PER_LONG -1)) {
+               if (~(tmp = *(p++)))
+                       goto found_middle;
+               result += BITS_PER_LONG;
+               size -= BITS_PER_LONG;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+found_first:
+       tmp |= ~0UL << size;
+found_middle:
+       return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__find_next_zero_bit);
+
+
+unsigned long __find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
+{
+       unsigned long *p = addr + (offset >> 6);
+       unsigned long result = offset & ~(BITS_PER_LONG-1);
+       unsigned long tmp;
+
+       if (offset >= size)
+               return size;
+       size -= result;
+       offset &= (BITS_PER_LONG-1);
+       if (offset) {
+               tmp = *(p++);
+               tmp &= (~0UL << offset);
+               if (size < BITS_PER_LONG)
+                       goto found_first;
+               if (tmp)
+                       goto found_middle;
+               size -= BITS_PER_LONG;
+               result += BITS_PER_LONG;
+       }
+       while (size & ~(BITS_PER_LONG-1)) {
+               if ((tmp = *(p++)))
+                       goto found_middle;
+               result += BITS_PER_LONG;
+               size -= BITS_PER_LONG;
+       }
+       if (!size)
+               return result;
+       tmp = *p;
+
+found_first:
+       tmp &= (~0UL >> (BITS_PER_LONG - size));
+       if (tmp == 0UL)        /* Are any bits set? */
+               return result + size; /* Nope. */
+found_middle:
+       return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(__find_next_bit);
+
+/*
+ * This implementation of ext2_find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
+ */
+
+extern unsigned long __ext2_find_next_zero_bit(void *addr,
+               unsigned long size, unsigned long offset)
+{
+       unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
+       unsigned int result = offset & ~31UL;
+       unsigned int tmp;
+
+       if (offset >= size)
+               return size;
+       size -= result;
+       offset &= 31UL;
+       if (offset) {
+               tmp = cpu_to_le32p(p++);
+               tmp |= ~0UL >> (32-offset);
+               if (size < 32)
+                       goto found_first;
+               if (tmp != ~0U)
+                       goto found_middle;
+               size -= 32;
+               result += 32;
+       }
+       while (size >= 32) {
+               if ((tmp = cpu_to_le32p(p++)) != ~0U)
+                       goto found_middle;
+               result += 32;
+               size -= 32;
+       }
+       if (!size)
+               return result;
+       tmp = cpu_to_le32p(p);
+found_first:
+       tmp |= ~0U << size;
+found_middle:
+       return result + ffz(tmp);
+}
+EXPORT_SYMBOL(__ext2_find_next_zero_bit);
+
diff -Naur linux-2.6.a/include/asm-parisc/bitops.h linux-2.6.b/include/asm-parisc/bitops.h
--- linux-2.6.a/include/asm-parisc/bitops.h     2004-06-06 15:17:31.000000000 +0200
+++ linux-2.6.b/include/asm-parisc/bitops.h     2004-06-06 19:53:20.232540000 +0200
@@ -354,78 +354,15 @@
  #define find_first_zero_bit(addr, size) \
         find_next_zero_bit((addr), (size), 0)

-static __inline__ unsigned long find_next_zero_bit(void * addr, unsigned long size, unsigned long offset)
-{
-       unsigned long * p = ((unsigned long *) addr) + (offset >> SHIFT_PER_LONG);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset &= (BITS_PER_LONG-1);
-       if (offset) {
-               tmp = *(p++);
-               tmp |= ~0UL >> (BITS_PER_LONG-offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (~tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG -1)) {
-               if (~(tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
-found_first:
-       tmp |= ~0UL << size;
-found_middle:
-       return result + ffz(tmp);
-}
+#define find_next_zero_bit(addr, size, offset) \
+               __find_next_zero_bit((addr), (size), (offset))

-static __inline__ unsigned long find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset)
-{
-       unsigned long *p = addr + (offset >> 6);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
-
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset &= (BITS_PER_LONG-1);
-       if (offset) {
-               tmp = *(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
-
-found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)        /* Are any bits set? */
-               return result + size; /* Nope. */
-found_middle:
-       return result + __ffs(tmp);
-}
+extern unsigned long __find_next_zero_bit(void * addr, unsigned long size, unsigned long offset);
+
+#define find_next_bit(addr, size, offset) \
+               __find_next_bit((addr), (size), (offset))
+
+extern unsigned long __find_next_bit(unsigned long *addr, unsigned long size, unsigned long offset);

  /**
   * find_first_bit - find the first set bit in a memory region
@@ -474,41 +411,10 @@
  #define ext2_find_first_zero_bit(addr, size) \
          ext2_find_next_zero_bit((addr), (size), 0)

-extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr,
-       unsigned long size, unsigned long offset)
-{
-       unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
-       unsigned int result = offset & ~31UL;
-       unsigned int tmp;
-
-       if (offset >= size)
-               return size;
-       size -= result;
-       offset &= 31UL;
-       if (offset) {
-               tmp = cpu_to_le32p(p++);
-               tmp |= ~0UL >> (32-offset);
-               if (size < 32)
-                       goto found_first;
-               if (tmp != ~0U)
-                       goto found_middle;
-               size -= 32;
-               result += 32;
-       }
-       while (size >= 32) {
-               if ((tmp = cpu_to_le32p(p++)) != ~0U)
-                       goto found_middle;
-               result += 32;
-               size -= 32;
-       }
-       if (!size)
-               return result;
-       tmp = cpu_to_le32p(p);
-found_first:
-       tmp |= ~0U << size;
-found_middle:
-       return result + ffz(tmp);
-}
+#define ext2_find_next_zero_bit(addr, size, offset) \
+               __ext2_find_next_zero_bit((addr), (size), (offset))
+
+extern unsigned long __ext2_find_next_zero_bit(void *addr, unsigned long size, unsigned long offset);

  /* Bitmap functions for the minix filesystem.  */
  #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
========><======

What's your opinion about its relievence for parisc?

hth,
	Joel
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [parisc-linux] uninline in bitops.c as ia64 or sparc64?
       [not found]   ` <1091293141.1920.34.camel@mulgrave>
@ 2004-08-09 14:54     ` Michael S. Zick
  2004-08-09 17:15       ` Michael S. Zick
  0 siblings, 1 reply; 3+ messages in thread
From: Michael S. Zick @ 2004-08-09 14:54 UTC (permalink / raw)
  To: James Bottomley; +Cc: PARISC list

[-- Attachment #1: Type: text/plain, Size: 3351 bytes --]

On Sat July 31 2004 11:58, James Bottomley wrote:
> On Sat, 2004-07-31 at 10:29, Michael S. Zick wrote:
> > How about a different algorithm?
> > Say:: 3 * Log_2 (Register Size)
> 
> This algorithm is a more complex form of generic_ffz() in linux/bitops.h
> 
> perhaps we should use generic_ffz as the basis for this?
> 
generic_ffz is defined as integer - is 'integer' the same size
cpu32 and cpu64?  If not, that routine needs a size-conditional
test for the other 32 bits on cpu64.

Here is a suggested fix for include/asm-parisc/bitops.h -

NOTE 1: I have no way to test this - some needs to check my
work.

NOTE 2: I did try to do this on (electronic) pencil and paper -
You can get the pictures at: <www.goquest.com/~mszick>
as: hpbitfinder.ps.bz2

NOTE 3: Same diff is attached as in-lined, just in case my
mail agent trashes it.

Mike

Index: include/asm-parisc/bitops.h
===================================================================
RCS file: /opt/lib/cvs/parisc/include/asm-parisc/bitops.h,v
retrieving revision 1.1.1.1
diff -u -3 -p -r1.1.1.1 bitops.h
--- a/include/asm-parisc/bitops.h	13 Jul 2004 05:40:29 -0000	1.1.1.1
+++ b/include/asm-parisc/bitops.h	9 Aug 2004 14:33:03 -0000
@@ -217,11 +217,16 @@ static __inline__ int test_bit(int nr, c
  *
  * This algorithm avoids branches by making use of nullification.
  * One side effect of "extr" instructions is it sets PSW[N] bit.
- * How PSW[N] (nullify next insn) gets set is determined by the
+ * How PSW[N] (nullify next insn) gets set is determined by the
  * "condition" field (eg "<>" or "TR" below) in the extr* insn.
  * Only the 1st and one of either the 2cd or 3rd insn will get executed.
  * Each set of 3 insn will get executed in 2 cycles on PA8x00 vs 16 or so
  * cycles for each mispredicted branch.
+ *
+ * Provide either a u64 or u32 version based on cpu register size.
+ * Note that the hard coded field indexes depend on cpu register size;
+ * and the extract instruction generates a right-justified result.
+ * The index numbers are hp-bit-position not radix 2 numbers. <msz>
  */

 static __inline__ unsigned long __ffs(unsigned long x)
@@ -229,14 +234,27 @@ static __inline__ unsigned long __ffs(un
 	unsigned long ret;

 	__asm__(
-#if BITS_PER_LONG > 32
+#ifdef __LP64__
 		" ldi       63,%1\n"
 		" extrd,u,*<>  %0,63,32,%%r0\n"
 		" extrd,u,*TR  %0,31,32,%0\n"	/* move top 32-bits down */
 		" addi    -32,%1,%1\n"
+		" extrd,u,*<>  %0,63,16,%%r0\n"
+		" extrd,u,*TR  %0,47,16,%0\n"   /* xxxx0000 -> 0000xxxx */
+		" addi    -16,%1,%1\n"
+		" extrd,u,*<>  %0,63,8,%%r0\n"
+		" extrd,u,*TR  %0,55,8,%0\n"    /* 0000xx00 -> 000000xx */
+		" addi    -8,%1,%1\n"
+		" extrd,u,*<>  %0,63,4,%%r0\n"
+		" extrd,u,*TR  %0,59,4,%0\n"    /* 000000x0 -> 0000000x */
+		" addi    -4,%1,%1\n"
+		" extrd,u,*<>  %0,63,2,%%r0\n"
+		" extrd,u,*TR  %0,61,2,%0\n"    /* 0000000y, 1100b -> 0011b */
+		" addi    -2,%1,%1\n"
+		" extrd,u,*=  %0,63,1,%%r0\n"   /* check last bit */
+		" addi    -1,%1,%1\n"
 #else
 		" ldi       31,%1\n"
-#endif
 		" extru,<>  %0,31,16,%%r0\n"
 		" extru,TR  %0,15,16,%0\n"	/* xxxx0000 -> 0000xxxx */
 		" addi    -16,%1,%1\n"
@@ -251,6 +269,7 @@ static __inline__ unsigned long __ffs(un
 		" addi    -2,%1,%1\n"
 		" extru,=  %0,31,1,%%r0\n"	/* check last bit */
 		" addi    -1,%1,%1\n"
+#endif
 			: "+r" (x), "=r" (ret) );
 	return ret;
 }



[-- Attachment #2: bitops.h.diff.bz2 --]
[-- Type: application/x-bzip2, Size: 1171 bytes --]

[-- Attachment #3: Type: text/plain, Size: 169 bytes --]

_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [parisc-linux] uninline in bitops.c as ia64 or sparc64?
  2004-08-09 14:54     ` [parisc-linux] uninline in bitops.c as ia64 or sparc64? Michael S. Zick
@ 2004-08-09 17:15       ` Michael S. Zick
  0 siblings, 0 replies; 3+ messages in thread
From: Michael S. Zick @ 2004-08-09 17:15 UTC (permalink / raw)
  To: parisc-linux

On Mon August 9 2004 09:54, Michael S. Zick wrote:
>
-------
> 
> Mike
> 
> Index: include/asm-parisc/bitops.h
> ===================================================================
> RCS file: /opt/lib/cvs/parisc/include/asm-parisc/bitops.h,v
> retrieving revision 1.1.1.1
> diff -u -3 -p -r1.1.1.1 bitops.h
Sorry for the self-reply...

If compiled with 32-bit compiler the NOT(__LP64__) path will compile.
If the resultant program is loaded on a 64-bit machine running 'narrow mode';
it will fail.  Since the mode bit only changes the addressing not the 
bit-position numbering.(1)

Translation:
As submitted, it should compile for 64-bit and run 64-Wide;
it should compile for 32-bit and run 32-bit.
It is only the case of compiling for 32-bit and then trying to
run it on a 64-bit machine in narrow mode that should fail.

I'll work on this - I probably will abuse the shift-right-double
instruction to come up with a compiles correctly, runs anywhere
version.

Two other problems I am considering:

1) It only beats on two (not counting GR0) registers;
it will probably cause register stalls on pipelined machines.

2) It is bigger than a cache line, probably executes in less
than a memory cycle - so it will probably 'starve' the cpu;
waiting for the next memory cycle to fill the next i-cache line.

Taken all together - an interesting problem.

Mike
(1) Does 32-bit gcc ever generate extract/deposit? This might
cause problems running anything 32-bit on 64-bit-narrow.
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2004-08-09 17:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-06-06 19:22 [parisc-linux] uniline ? Joel Soete
     [not found] ` <200407310929.29022.mszick@goquest.com>
     [not found]   ` <1091293141.1920.34.camel@mulgrave>
2004-08-09 14:54     ` [parisc-linux] uninline in bitops.c as ia64 or sparc64? Michael S. Zick
2004-08-09 17:15       ` Michael S. Zick

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.