From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Tue, 11 Mar 2008 10:33:50 +0000 Subject: [Cluster-devel] [GFS2] Faster gfs2_bitfit algorithm In-Reply-To: <1205191067.2873.58.camel@technetium.msp.redhat.com> References: <1205191067.2873.58.camel@technetium.msp.redhat.com> Message-ID: <1205231630.3358.3.camel@localhost.localdomain> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, Thats a worthwhile improvement. Now in the -nmw git tree. Thanks, Steve. On Mon, 2008-03-10 at 18:17 -0500, Bob Peterson wrote: > Hi, > > This version of the gfs2_bitfit algorithm includes the latest > suggestions from Steve Whitehouse. It is typically eight to > ten times faster than the version we're using today. If there > is a lot of metadata mixed in (lots of small files) the > algorithm is often 15 times faster, and given the right > conditions, I've seen peaks of 20 times faster. > > Regards, > > Bob Peterson > > Signed-off-by: Bob Peterson > -- > fs/gfs2/rgrp.c | 93 ++++++++++++++++++++++++++++++++++++------------------- > 1 files changed, 61 insertions(+), 32 deletions(-) > > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index 4291375..7e8f0b1 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -14,6 +14,7 @@ > #include > #include > #include > +#include > > #include "gfs2.h" > #include "incore.h" > @@ -33,6 +34,16 @@ > #define BFITNOENT ((u32)~0) > #define NO_BLOCK ((u64)~0) > > +#if BITS_PER_LONG == 32 > +#define LBITMASK (0x55555555UL) > +#define LBITSKIP55 (0x55555555UL) > +#define LBITSKIP00 (0x00000000UL) > +#else > +#define LBITMASK (0x5555555555555555UL) > +#define LBITSKIP55 (0x5555555555555555UL) > +#define LBITSKIP00 (0x0000000000000000UL) > +#endif > + > /* > * These routines are used by the resource group routines (rgrp.c) > * to keep track of block allocation. Each block is represented by two > @@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, > static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, > u8 old_state) > { > - const u8 *byte; > - u32 blk = goal; > - unsigned int bit, bitlong; > - const unsigned long *plong; > -#if BITS_PER_LONG == 32 > - const unsigned long plong55 = 0x55555555; > -#else > - const unsigned long plong55 = 0x5555555555555555; > -#endif > - > - byte = buffer + (goal / GFS2_NBBY); > - plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY)); > - bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; > - bitlong = bit; > - > - while (byte < buffer + buflen) { > - > - if (bitlong == 0 && old_state == 0 && *plong == plong55) { > - plong++; > - byte += sizeof(unsigned long); > - blk += sizeof(unsigned long) * GFS2_NBBY; > - continue; > + const u8 *byte, *start, *end; > + int bit, startbit; > + u32 g1, g2, misaligned; > + unsigned long *plong; > + unsigned long lskipval; > + > + lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; > + g1 = (goal / GFS2_NBBY); > + start = buffer + g1; > + byte = start; > + end = buffer + buflen; > + g2 = ALIGN(g1, sizeof(unsigned long)); > + plong = (unsigned long *)(buffer + g2); > + startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; > + misaligned = g2 - g1; > + if (!misaligned) > + goto ulong_aligned; > +/* parse the bitmap a byte at a time */ > +misaligned: > + while (byte < end) { > + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { > + return goal + > + (((byte - start) * GFS2_NBBY) + > + ((bit - startbit) >> 1)); > } > - if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) > - return blk; > bit += GFS2_BIT_SIZE; > - if (bit >= 8) { > + if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { > bit = 0; > byte++; > + misaligned--; > + if (!misaligned) { > + plong = (unsigned long *)byte; > + goto ulong_aligned; > + } > } > - bitlong += GFS2_BIT_SIZE; > - if (bitlong >= sizeof(unsigned long) * 8) { > - bitlong = 0; > - plong++; > - } > - > - blk++; > } > + return BFITNOENT; > > +/* parse the bitmap a unsigned long at a time */ > +ulong_aligned: > + /* Stop at "end - 1" or else prefetch can go past the end and segfault. > + We could "if" it but we'd lose some of the performance gained. > + This way will only slow down searching the very last 4/8 bytes > + depending on architecture. I've experimented with several ways > + of writing this section such as using an else before the goto > + but this one seems to be the fastest. */ > + while ((unsigned char *)plong < end - 1) { > + prefetch(plong + 1); > + if (((*plong) & LBITMASK) != lskipval) > + break; > + plong++; > + } > + if ((unsigned char *)plong < end) { > + byte = (const u8 *)plong; > + misaligned += sizeof(unsigned long) - 1; > + goto misaligned; > + } > return BFITNOENT; > } > > >