From: David Turner <dturner@twopensource.com>
To: git@vger.kernel.org, gitster@pobox.com, mhagger@alum.mit.edu,
neleai@seznam.cz
Cc: David Turner <dturner@twitter.com>
Subject: [PATCH v8] refs.c: SSE2 optimizations for check_refname_component
Date: Mon, 16 Jun 2014 15:22:46 -0400 [thread overview]
Message-ID: <1402946566-14923-1-git-send-email-dturner@twitter.com> (raw)
Optimize check_refname_component using SSE2 on x86_64.
git rev-parse HEAD is a good test-case for this, since it does almost
nothing except parse refs. For one particular repo with about 60k
refs, almost all packed, the timings are:
Look up table: 29 ms
SSE2: 23 ms
This cuts about 20% off of the runtime.
The configure.ac changes include code from the GNU C Library written
by Joseph S. Myers <joseph at codesourcery dot com>.
Ondřej Bílka <neleai@seznam.cz> suggested an SSE2 approach to the
substring searches, which netted a speed boost over the SSE4.2 code I
had initially written.
Signed-off-by: David Turner <dturner@twitter.com>
---
git-compat-util.h | 10 +++
refs.c | 244 +++++++++++++++++++++++++++++++++++++++++++++--------
t/t5511-refspec.sh | 19 +++++
3 files changed, 239 insertions(+), 34 deletions(-)
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..291d46b 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -668,6 +668,16 @@ void git_qsort(void *base, size_t nmemb, size_t size,
#endif
#endif
+#if defined(__GNUC__) && defined(__x86_64__)
+#include <emmintrin.h>
+/* This is the system memory page size; it's used so that we can read
+ * outside the bounds of an allocation without segfaulting.
+ */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+#endif
+
#ifdef UNRELIABLE_FSTAT
#define fstat_is_reliable() 0
#else
diff --git a/refs.c b/refs.c
index 46139d2..4c39af7 100644
--- a/refs.c
+++ b/refs.c
@@ -8,22 +8,43 @@
/*
* How to handle various characters in refnames:
* 0: An acceptable character for refs
- * 1: End-of-component
- * 2: ., look for a preceding . to reject .. in refs
- * 3: {, look for a preceding @ to reject @{ in refs
- * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
+ * 1: \0: End-of-component and string
+ * 2: /: End-of-component
+ * 3: ., look for a preceding . to reject .. in refs
+ * 4: {, look for a preceding @ to reject @{ in refs
+ * 5: *, usually a bad character except, once as a wildcard
+ * 6: A bad character except * (see check_refname_component below)
*/
static unsigned char refname_disposition[256] = {
- 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
+ 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 6, 6
};
+static int check_refname_component_trailer(const char *cp, const char *refname, int flags)
+{
+ if (cp == refname)
+ return 0; /* Component has zero length. */
+ if (refname[0] == '.') {
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1; /* Component starts with '.'. */
+ /*
+ * Even if leading dots are allowed, don't allow "."
+ * as a component (".." is prevented by a rule above).
+ */
+ if (refname[1] == '\0')
+ return -1; /* Component equals ".". */
+ }
+ if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+ return -1; /* Refname ends with ".lock". */
+ return cp - refname;
+}
+
/*
* Try to read one refname component from the front of refname.
* Return the length of the component found, or -1 if the component is
@@ -33,8 +54,9 @@ static unsigned char refname_disposition[256] = {
* - any path component of it begins with ".", or
* - it has double dots "..", or
* - it has ASCII control character, "~", "^", ":" or SP, anywhere, or
- * - it ends with a "/".
- * - it ends with ".lock"
+ * - it has pattern-matching notation "*", "?", "[", anywhere, or
+ * - it ends with a "/", or
+ * - it ends with ".lock", or
* - it contains a "\" (backslash)
*/
static int check_refname_component(const char *refname, int flags)
@@ -46,47 +68,32 @@ static int check_refname_component(const char *refname, int flags)
int ch = *cp & 255;
unsigned char disp = refname_disposition[ch];
switch(disp) {
- case 1:
- goto out;
+ case 1: /* fall-through */
case 2:
+ return check_refname_component_trailer(cp, refname, flags);
+ case 3:
if (last == '.')
return -1; /* Refname contains "..". */
break;
- case 3:
+ case 4:
if (last == '@')
return -1; /* Refname contains "@{". */
break;
- case 4:
+ case 5: /* fall-through */
+ case 6:
return -1;
}
last = ch;
}
-out:
- if (cp == refname)
- return 0; /* Component has zero length. */
- if (refname[0] == '.') {
- if (!(flags & REFNAME_DOT_COMPONENT))
- return -1; /* Component starts with '.'. */
- /*
- * Even if leading dots are allowed, don't allow "."
- * as a component (".." is prevented by a rule above).
- */
- if (refname[1] == '\0')
- return -1; /* Component equals ".". */
- }
- if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
- return -1; /* Refname ends with ".lock". */
- return cp - refname;
}
-int check_refname_format(const char *refname, int flags)
+static int check_refname_format_bytewise (const char *refname, int flags)
{
int component_len, component_count = 0;
if (!strcmp(refname, "@"))
/* Refname is a single character '@'. */
return -1;
-
while (1) {
/* We are at the start of a path component. */
component_len = check_refname_component(refname, flags);
@@ -115,6 +122,175 @@ int check_refname_format(const char *refname, int flags)
return 0;
}
+#if defined(__GNUC__) && defined(__x86_64__)
+#define SSE_VECTOR_BYTES 16
+
+/* Vectorized version of check_refname_format. */
+int check_refname_format(const char *refname, int flags)
+{
+ const char *cp = refname;
+
+ const __m128i dot = _mm_set1_epi8 ('.');
+ const __m128i at = _mm_set1_epi8 ('@');
+ const __m128i curly = _mm_set1_epi8 ('{');
+ const __m128i slash = _mm_set1_epi8 ('/');
+ const __m128i zero = _mm_set1_epi8 ('\000');
+
+ /* below '*', all characters are forbidden or rare */
+ const __m128i star_ub = _mm_set1_epi8 ('*' + 1);
+
+ const __m128i colon = _mm_set1_epi8 (':');
+ const __m128i question = _mm_set1_epi8 ('?');
+
+ /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */
+ const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1);
+ const __m128i caret_ub = _mm_set1_epi8 ('^' + 1);
+
+ /* '~' and above are forbidden */
+ const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1);
+
+ if (refname[0] == 0 || refname[0] == '/') {
+ /* entirely empty ref or initial ref component */
+ return -1;
+ }
+
+ /* Initial ref component of '.'; below we look for /. so we'll
+ * miss this. */
+ if (refname[0] == '.') {
+ if (refname[1] == '/' || refname[1] == '\0')
+ return -1;
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1;
+ }
+ while(1) {
+ __m128i tmp, tmp1, result;
+ uint64_t mask;
+
+ if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1)
+ /* End-of-page; fall back to slow method for
+ * this entire ref. */
+ return check_refname_format_bytewise(refname, flags);
+
+ tmp = _mm_loadu_si128((__m128i *)cp);
+ tmp1 = _mm_loadu_si128((__m128i *)(cp + 1));
+
+ /*
+ * This range (note the lt) contains some
+ * permissible-but-rare characters (including all
+ * characters >= 128), which we handle later. It also
+ * includes \000.
+ */
+ result = _mm_cmplt_epi8(tmp, star_ub);
+
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question));
+ result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon));
+
+ /* This range contains the permissible ] as bycatch */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpgt_epi8(tmp, bracket_lb),
+ _mm_cmplt_epi8(tmp, caret_ub)));
+
+ result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb));
+
+ /* .. */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpeq_epi8(tmp, dot),
+ _mm_cmpeq_epi8(tmp1, dot)));
+ /* @{ */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpeq_epi8(tmp, at),
+ _mm_cmpeq_epi8(tmp1, curly)));
+ /* // */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, slash)));
+ /* trailing / */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, zero)));
+ /*
+ * Even though /. is not necessarily an error, we flag
+ * it anyway. If we find it, we'll check if it's valid
+ * and if so we'll advance just past it.
+ */
+ result = _mm_or_si128(result, _mm_and_si128 (
+ _mm_cmpeq_epi8(tmp, slash),
+ _mm_cmpeq_epi8(tmp1, dot)));
+
+ mask = _mm_movemask_epi8(result);
+ if (mask) {
+ /* We've found either end-of-string, or some
+ probably-bad character or substring. */
+ int i = __builtin_ctz(mask);
+ switch (refname_disposition[cp[i] & 255]) {
+ case 0:
+ /*
+ * bycatch: a good character that's in
+ * one of the ranges of mostly-forbidden
+ * characters
+ */
+ cp += i + 1;
+ continue;
+ case 1:
+ cp += i;
+ goto success;
+ case 2:
+ /*
+ * Even if leading dots are allowed, don't
+ * allow "." as a component (".." is
+ * prevented by case 3 below).
+ */
+ if (cp[i + 1] == '.') {
+ if (cp[i + 2] == '\0')
+ return -1;
+ if (flags & REFNAME_DOT_COMPONENT) {
+ /* skip to just after the /. */
+ cp += i + 2;
+ continue;
+ }
+ return -1;
+ } else if (cp[i + 1] == '/' || cp[i + 1] == '\0')
+ return -1;
+ break;
+ case 3:
+ if (cp[i + 1] == '.' || cp[i + 1] == '\0')
+ return -1;
+ break;
+ case 4:
+ if (cp[i + 1] == '{')
+ return -1;
+ break;
+ case 5:
+ if (((cp == refname + i) || cp[i - 1] == '/')
+ && (cp[i + 1] == '/' || cp[i + 1] == 0))
+ if (flags & REFNAME_REFSPEC_PATTERN) {
+ flags &= ~REFNAME_REFSPEC_PATTERN;
+ /* restart after the * */
+ cp += i + 1;
+ continue;
+ }
+ /* fall-through */
+ case 6:
+ return -1;
+ }
+ } else
+ cp += SSE_VECTOR_BYTES;
+ }
+success:
+ if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+ return -1; /* Refname ends with ".lock". */
+ return 0;
+}
+
+#else
+
+int check_refname_format (const char *refname, int flags)
+{
+ return check_refname_format_bytewise(refname, flags);
+}
+
+#endif
+
struct ref_entry;
/*
diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index de6db86..ef0f095 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -29,6 +29,10 @@ test_refspec push '+:'
test_refspec fetch ''
test_refspec fetch ':'
test_refspec fetch '::' invalid
+test_refspec fetch '.' invalid
+test_refspec fetch './frotz' invalid
+test_refspec fetch 'refs/.' invalid
+test_refspec push '*//:refs/remotes/frotz/*' invalid
test_refspec push 'refs/heads/*:refs/remotes/frotz/*'
test_refspec push 'refs/heads/*:refs/remotes/frotz' invalid
@@ -71,6 +75,8 @@ test_refspec fetch ':refs/remotes/frotz/HEAD-to-me'
test_refspec push ':refs/remotes/frotz/delete me' invalid
test_refspec fetch ':refs/remotes/frotz/HEAD to me' invalid
+test_refspec fetch ':refs/remotes/frotz/something.lock' invalid
+
test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid
test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid
@@ -88,4 +94,17 @@ test_refspec fetch "refs/heads/${good}"
bad=$(printf '\011tab')
test_refspec fetch "refs/heads/${bad}" invalid
+test_refspec fetch 'refs/heads/a-very-long-refname'
+test_refspec fetch 'refs/heads/.a-very-long-refname' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234..' invalid
+test_refspec fetch 'refs/heads/abcdefgh012345..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123.a'
+test_refspec fetch 'refs/heads/abcdefgh01234.a'
+test_refspec fetch 'refs/heads/abcdefgh012345.a'
+test_refspec fetch 'refs/heads/abcdefgh0123456.a'
+test_refspec fetch 'refs/heads/abcdefgh01234567.a'
+
test_done
--
2.0.0.rc1.24.g0588c94.dirty
next reply other threads:[~2014-06-16 19:23 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-16 19:22 David Turner [this message]
2014-06-17 0:06 ` [PATCH v8] refs.c: SSE2 optimizations for check_refname_component Junio C Hamano
2014-06-17 0:51 ` David Turner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1402946566-14923-1-git-send-email-dturner@twitter.com \
--to=dturner@twopensource.com \
--cc=dturner@twitter.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
--cc=neleai@seznam.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.