All of lore.kernel.org
 help / color / mirror / Atom feed
From: Grant Grundler <grundler@parisc-linux.org>
To: parisc-linux@lists.parisc-linux.org
Subject: Re: [parisc-linux] ext3 perf patch#4
Date: Sat, 26 Mar 2005 18:18:49 -0700	[thread overview]
Message-ID: <20050327011849.GD9287@colo.lackof.org> (raw)
In-Reply-To: <20050326103540.GA31557@colo.lackof.org>

On Sat, Mar 26, 2005 at 03:35:40AM -0700, Grant Grundler wrote:
> James Bottomley in a private conversation suggested our
> ext2_test_bit() and ext2_find_next_zero_bit() might not
> be counting bits the same way in the bitmap.

James, kudos and thank you.

With appended patch, I'm getting reasonable ext3 performance.
Can someone please test this patch in a 32-bit kernel?

I'm comfortable this patch works for 64-bit since I've been building
kernels and running rsync across 3 disks with no problems.
I *think* the code is 32-bit clean as well, but haven't tested it.


Some minor additional, interesting things folks can do:
o Should this function should really be "static inline"?
  sparc64 puts it into arch/sparc64/lib :
  grundler <502>fgrep find_next_zero_le arch/sparc64/lib/find_bit.c 
  unsigned long find_next_zero_le_bit(unsigned long *addr, unsigned long size, unsigned long offset)

  How often is it called? More than three/four times would make
  it candidate for lib.

o ffz() is expanded inline. Restructure code so we don't
  need find_middle_swap() label. Ie do endian swap inside
  the conditional...but then the loop gets uglier and
  parisc can't use a conditional branch.
  Making this a normal function might make this item irrelevant.

o drop or optimize "if (tmp == ~0UL)" right before found_middle label.
  ffz() isn't that expensive compared to a mis-predicted branch.
  Figure out how many cycles ffz() costs, how many cycles a
  mispredicted branch costs, and then what the right tradeoff
  is in this case (including reduced code size).

Simple test results follow and then the patch.

riot:/mnt3# df
Filesystem           1K-blocks      Used Available Use% Mounted on
/dev/sdk3             16146848   1649084  13677540  11% /
/dev/sdk1               189387     97070     82539  55% /boot
/dev/sdb              17504036  14616368   1998508  88% /mnt3
/dev/sdu              17504036  14616368   1998508  88% /mnt2
/dev/sdv              17504036   5128996  11485880  31% /mnt

grundler@riot:/$ lsscsi
[0:0:0:0]    disk    SEAGATE  ST318203LC       HP02  /dev/sda
[0:0:1:0]    disk    SEAGATE  ST318203LC       HP02  /dev/sdb
...
[5:0:12:0]   disk    SEAGATE  ST318451LC       HP01  /dev/sdu
[5:0:13:0]   disk    SEAGATE  ST318451LC       HP01  /dev/sdv


riot:/mnt# time tar xjf linux-2.6.12-rc1-pa3.tar.bz2 

real    0m51.988s
user    0m44.175s
sys     0m7.912s

riot:/mnt# cd ../mnt2
riot:/mnt2# time tar xjf /mnt/linux-2.6.12-rc1-pa3.tar.bz2 

real    0m52.338s
user    0m44.232s
sys     0m7.912s
riot:/mnt2# cd /mnt3  
riot:/mnt3# time tar xjf /mnt/linux-2.6.12-rc1-pa3.tar.bz2 

real    0m54.755s
user    0m44.181s
sys     0m7.998s


iostat 10 second sample while running tar:

avg-cpu:  %user   %nice    %sys %iowait   %idle
          43.73    0.00   11.14    9.95   35.18

Device:            tps   Blk_read/s   Blk_wrtn/s   Blk_read   Blk_wrtn
sdb               0.00         0.00         0.00          0          0
sdu               0.00         0.00         0.00          0          0
sdv             130.87         2.40     11723.48         24     117352


thanks,
grant


Signed-off-by: Grant Grundler <grundler@parisc-linux.org>


Index: include/asm-parisc/bitops.h
===================================================================
RCS file: /var/cvs/linux-2.6/include/asm-parisc/bitops.h,v
retrieving revision 1.15
diff -u -p -r1.15 bitops.h
--- include/asm-parisc/bitops.h	24 Mar 2005 15:26:29 -0000	1.15
+++ include/asm-parisc/bitops.h	27 Mar 2005 01:09:41 -0000
@@ -466,52 +466,67 @@ static __inline__ int ext2_test_bit(int 
 	return (ADDR[nr >> 3] >> (nr & 7)) & 1;
 }
 
-/*
- * This implementation of ext2_find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h and modified for a big-endian machine.
- */
+/* include/linux/byteorder does not support "unsigned long" type */
+static inline unsigned long ext2_swabp(unsigned long * x)
+{
+#ifdef CONFIG_64BIT
+	return (unsigned long) __swab64p((u64 *) x);
+#else
+	return (unsigned long) __swab32p((u32 *) x);
+#endif
+}
 
-#define ext2_find_first_zero_bit(addr, size) \
-        ext2_find_next_zero_bit((addr), (size), 0)
+/* include/linux/byteorder doesn't support "unsigned long" type */
+static inline unsigned long ext2_swab(unsigned long y)
+{
+#ifdef CONFIG_64BIT
+	return (unsigned long) __swab64((u64) y);
+#else
+	return (unsigned long) __swab32((u32) y);
+#endif
+}
 
-extern __inline__ unsigned long ext2_find_next_zero_bit(void *addr,
-	unsigned long size, unsigned long offset)
+static __inline__ unsigned long ext2_find_next_zero_bit(unsigned long *addr, unsigned long size, unsigned long offset)
 {
-	unsigned int *p = ((unsigned int *) addr) + (offset >> 5);
-	unsigned int result = offset & ~31UL;
-	unsigned int tmp;
+	unsigned long *p = addr + (offset >> SHIFT_PER_LONG);
+	unsigned long result = offset & ~(BITS_PER_LONG - 1);
+	unsigned long tmp;
 
 	if (offset >= size)
 		return size;
 	size -= result;
-	offset &= 31UL;
-	if (offset) {
-		tmp = cpu_to_le32p(p++);
-		tmp |= ~0UL >> (32-offset);
-		if (size < 32)
+	offset &= (BITS_PER_LONG - 1UL);
+	if(offset) {
+		tmp = ext2_swabp(p++);
+		tmp |= (~0UL >> (BITS_PER_LONG - offset));
+		if(size < BITS_PER_LONG)
 			goto found_first;
-		if (tmp != ~0U)
+		if(~tmp)
 			goto found_middle;
-		size -= 32;
-		result += 32;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
 	}
-	while (size >= 32) {
-		if ((tmp = *p++) != ~0U)
+
+	while(size & ~(BITS_PER_LONG - 1)) {
+		if(~(tmp = *(p++)))
 			goto found_middle_swap;
-		result += 32;
-		size -= 32;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
 	}
-	if (!size)
+	if(!size)
 		return result;
-	tmp = cpu_to_le32p(p);
+	tmp = ext2_swabp(p);
 found_first:
-	tmp |= ~0U << size;
+	tmp |= ~0UL << size;
+	if (tmp == ~0UL)	/* Are any bits zero? */
+		return result + size; /* Nope. Skip ffz */
 found_middle:
 	return result + ffz(tmp);
+
 found_middle_swap:
-	tmp = cpu_to_le32(tmp);
-	return result + ffz(tmp);
+	return result + ffz(ext2_swab(tmp));
 }
+
 
 /* Bitmap functions for the minix filesystem.  */
 #define minix_test_and_set_bit(nr,addr) ext2_set_bit(nr,addr)
_______________________________________________
parisc-linux mailing list
parisc-linux@lists.parisc-linux.org
http://lists.parisc-linux.org/mailman/listinfo/parisc-linux

  parent reply	other threads:[~2005-03-27  1:18 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-03-26 10:35 [parisc-linux] ext3 perf patch#2 Grant Grundler
     [not found] ` <42454E7C.3050509@tiscali.be>
2005-03-26 21:39   ` Grant Grundler
2005-03-27  1:18 ` Grant Grundler [this message]
2005-03-27 13:49   ` [parisc-linux] ext3 perf patch#4 Joel Soete
2005-03-28  3:48     ` Grant Grundler
2005-03-28 12:51       ` Joel Soete

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050327011849.GD9287@colo.lackof.org \
    --to=grundler@parisc-linux.org \
    --cc=parisc-linux@lists.parisc-linux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.