linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "George Spelvin" <linux@horizon.com>
To: bharrosh@panasas.com
Cc: linux@horizon.com, linux-arch@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: Big git diff speedup by avoiding x86 "fast string" memcmp
Date: 18 Dec 2010 17:54:36 -0500	[thread overview]
Message-ID: <20101218225436.28264.qmail@science.horizon.com> (raw)

> static inline int dentry_memcmp_long(const unsigned char *cs,
> 				const unsigned char *ct, ssize_t count)
> {
> 	int ret;
> 	const unsigned long *ls = (const unsigned long *)cs;
> 	const unsigned long *lt = (const unsigned long *)ct;
> 
> 	while (count > 8) {
> 		ret = (*cs != *ct);
> 		if (ret)
> 			break;
> 		cs++;
> 		ct++;
> 		count-=8;
> 	}
> 	if (count) {
> 		unsigned long t = *ct & ((0xffffffffffffffff >> ((8 - count) * 8))
> 		ret = (*cs != t)
> 	}
> 
> 	return ret;
> }

First, let's get the code right, and use correct types, but also, there
are some tricks to reduce the masking cost.

As long as you have to mask one string, *and* don't have to worry about
running off the end of mapped memory, there's no additional cost to
masking both in the loop.  Just test (a ^ b) & mask.

#if 1	/* Table lookup */

static unsigned long const dentry_memcmp_mask[8] = {
#if defined(__LITTLE_ENDIAN)
	0xff, 0xffff, 0xffffff, 0xffffffff,
#if sizeof (unsigned long) > 4
	0xffffffffff, 0xffffffffffff, 0xffffffffffffff, 0xffffffffffffffff
#endif
#elif defined(__BIG_ENDIAN)
#if sizeof (unsigned long) == 4
	0xff000000, 0xffff0000, 0xffffff00, 0xffffffff
#else
	0xff00000000000000, 0xffff000000000000,
	0xffffff0000000000, 0xffffffff00000000,
	0xffffffffff000000, 0xffffffffffff0000,
	0xffffffffffffff00, 0xffffffffffffffff
#endif
#else
#error undefined endianness
#endif
};

#define dentry_memcmp_mask(count) dentry_memcmp_mask[(count)-1]

#else /* In-line code */

#if defined(__LITTLE_ENDIAN)
#define dentry_memcmp_mask(count) (-1ul >> (sizeof 1ul - (count)) * 8)
#elif defined(__BIG_ENDIAN)
#define dentry_memcmp_mask(count) (-1ul << (sizeof 1ul - (count)) * 8)
#else
#error undefined endianness

#endif

static inline bool dentry_memcmp_long(unsigned char const *cs,
				unsigned char const *ct, ssize_t count)
{
	unsigned long const *ls = (unsigned long const *)cs;
	unsigned long const *lt = (unsigned long const *)ct;

	while (count > sizeof *cs) {
		if (*cs != *ct)
			return true;
		cs++;
		ct++;
		count -= sizeof *cs;
	}
	/* Last 1..8 bytes */
	return ((*ct ^ *cs) & dentry_memcmp_mask(count)) != 0;
}

If your string is at least 8 bytes long, and the processor has fast unaligned
loads, you can skip the mask entirely by redundantly comparing some bytes
(although the code bloat is probably undesirable for inline code):

static inline bool dentry_memcmp_long(const unsigned char *cs,
				const unsigned char *ct, ssize_t count)
{
	unsigned long const *ls = (unsigned long const *)cs;
	unsigned long const *lt = (unsigned long const *)ct;

	if (count < sizeof *cs)
		return ((*ct ^ *cs) & dentry_memcmp_mask(count)) != 0;

	while (count > sizeof *cs) {
		if (*cs != *ct)
			return true;
		cs++;
		ct++;
		count -= sizeof *cs;
	}
	cs = (unsigned long const *)((char const *)cs + count - sizeof *cs);
	ct = (unsigned long const *)((char const *)ct + count - sizeof *ct);
	return *cs != *ct;
}

             reply	other threads:[~2010-12-18 22:54 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-18 22:54 George Spelvin [this message]
2010-12-19 14:28 ` Big git diff speedup by avoiding x86 "fast string" memcmp Boaz Harrosh
2010-12-19 15:46 ` Nick Piggin
2010-12-19 17:06   ` George Spelvin
2010-12-21  9:26     ` Nick Piggin
  -- strict thread matches above, loose matches on Subject: below --
2010-12-09  7:09 Nick Piggin
2010-12-09 13:37 ` Borislav Petkov
2010-12-10  2:38   ` Nick Piggin
2010-12-10  4:27 ` Nick Piggin
2010-12-10 14:23 ` J. R. Okajima
2010-12-13  1:45   ` Nick Piggin
2010-12-13  7:29     ` J. R. Okajima
2010-12-13  8:25       ` Nick Piggin
2010-12-14 19:01         ` J. R. Okajima
2010-12-15  4:06           ` Nick Piggin
2010-12-15  5:57             ` J. R. Okajima
2010-12-15 13:15             ` Boaz Harrosh
2010-12-15 18:00               ` David Miller
2010-12-16  9:53                 ` Boaz Harrosh
2010-12-16 13:13                   ` Nick Piggin
2010-12-16 14:03                     ` Boaz Harrosh
2010-12-16 14:15                       ` Nick Piggin
2010-12-16 16:51                   ` Linus Torvalds
2010-12-16 17:57                   ` David Miller
2010-12-15  4:38         ` Américo Wang
2010-12-15  5:54           ` Nick Piggin
2010-12-15  7:12             ` Linus Torvalds
2010-12-15 23:09 ` Tony Luck
2010-12-16  2:34   ` Nick Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101218225436.28264.qmail@science.horizon.com \
    --to=linux@horizon.com \
    --cc=bharrosh@panasas.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).