git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 1/2] refs.c: optimize check_refname_component()
@ 2014-06-02  6:20 David Turner
  2014-06-03 18:08 ` Junio C Hamano
  0 siblings, 1 reply; 4+ messages in thread
From: David Turner @ 2014-06-02  6:20 UTC (permalink / raw)
  To: Michael Haggerty, Junio C Hamano, git; +Cc: David Turner

In a repository with many refs, check_refname_component can be a major
contributor to the runtime of some git commands. One such command is

git rev-parse HEAD

Timings for one particular repo, with about 60k refs, almost all
packed, are:

Old: 35 ms
New: 29 ms

Many other commands which read refs are also sped up.

Signed-off-by: David Turner <dturner@twitter.com>
---
 refs.c             | 67 +++++++++++++++++++++++++++++++-----------------------
 t/t5511-refspec.sh |  6 ++++-
 2 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/refs.c b/refs.c
index 28d5eca..dd28f2a 100644
--- a/refs.c
+++ b/refs.c
@@ -6,8 +6,29 @@
 #include "string-list.h"
 
 /*
- * Make sure "ref" is something reasonable to have under ".git/refs/";
- * We do not like it if:
+ * How to handle various characters in refnames:
+ * 0: An acceptable character for refs
+ * 1: End-of-component
+ * 2: ., look for a preceding . to reject .. in refs
+ * 3: {, look for a preceding @ to reject @{ in refs
+ * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
+ */
+static unsigned char refname_disposition[256] = {
+	1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
+};
+
+/*
+ * Try to read one refname component from the front of refname.
+ * Return the length of the component found, or -1 if the component is
+ * not legal.  It is legal if it is something reasonable to have under
+ * ".git/refs/"; We do not like it if:
  *
  * - any path component of it begins with ".", or
  * - it has double dots "..", or
@@ -16,41 +37,31 @@
  * - it ends with ".lock"
  * - it contains a "\" (backslash)
  */
-
-/* Return true iff ch is not allowed in reference names. */
-static inline int bad_ref_char(int ch)
-{
-	if (((unsigned) ch) <= ' ' || ch == 0x7f ||
-	    ch == '~' || ch == '^' || ch == ':' || ch == '\\')
-		return 1;
-	/* 2.13 Pattern Matching Notation */
-	if (ch == '*' || ch == '?' || ch == '[') /* Unsupported */
-		return 1;
-	return 0;
-}
-
-/*
- * Try to read one refname component from the front of refname.  Return
- * the length of the component found, or -1 if the component is not
- * legal.
- */
 static int check_refname_component(const char *refname, int flags)
 {
 	const char *cp;
 	char last = '\0';
 
 	for (cp = refname; ; cp++) {
-		char ch = *cp;
-		if (ch == '\0' || ch == '/')
+		unsigned char ch = (unsigned char) *cp;
+		unsigned char disp = refname_disposition[ch];
+		switch(disp) {
+		case 1:
+			goto out;
+		case 2:
+			if (last == '.')
+				return -1; /* Refname contains "..". */
+			break;
+		case 3:
+			if (last == '@')
+				return -1; /* Refname contains "@{". */
 			break;
-		if (bad_ref_char(ch))
-			return -1; /* Illegal character in refname. */
-		if (last == '.' && ch == '.')
-			return -1; /* Refname contains "..". */
-		if (last == '@' && ch == '{')
-			return -1; /* Refname contains "@{". */
+		case 4:
+			return -1;
+		}
 		last = ch;
 	}
+out:
 	if (cp == refname)
 		return 0; /* Component has zero length. */
 	if (refname[0] == '.') {
diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index c289322..1571176 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -5,7 +5,6 @@ test_description='refspec parsing'
 . ./test-lib.sh
 
 test_refspec () {
-
 	kind=$1 refspec=$2 expect=$3
 	git config remote.frotz.url "." &&
 	git config --remove-section remote.frotz &&
@@ -84,4 +83,9 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
 test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
 test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
 
+good=$(echo -n '\0377')
+test_refspec fetch "refs/heads/${good}"
+bad=$(echo -n '\011')
+test_refspec fetch "refs/heads/${bad}"				invalid
+
 test_done
-- 
2.0.0.rc1.18.gf763c0f

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v5 1/2] refs.c: optimize check_refname_component()
  2014-06-02  6:20 [PATCH v5 1/2] refs.c: optimize check_refname_component() David Turner
@ 2014-06-03 18:08 ` Junio C Hamano
  0 siblings, 0 replies; 4+ messages in thread
From: Junio C Hamano @ 2014-06-03 18:08 UTC (permalink / raw)
  To: David Turner; +Cc: Michael Haggerty, git, David Turner

David Turner <dturner@twopensource.com> writes:

>  static int check_refname_component(const char *refname, int flags)
>  {
>  	const char *cp;
>  	char last = '\0';
>  
>  	for (cp = refname; ; cp++) {
> -		char ch = *cp;
> -		if (ch == '\0' || ch == '/')
> +		unsigned char ch = (unsigned char) *cp;

Hmph, this cast bothers me.  I am fine with either of these two, though.

	int ch = *cp & 0377;
        unsigned char ch = *((unsigned char *)cp);

> +		unsigned char disp = refname_disposition[ch];
> +		switch(disp) {
> +		case 1:
> +			goto out;
> +		case 2:
> +			if (last == '.')
> +				return -1; /* Refname contains "..". */
> +			break;
> +		case 3:
> +			if (last == '@')
> +				return -1; /* Refname contains "@{". */
>  			break;
> -		if (bad_ref_char(ch))
> -			return -1; /* Illegal character in refname. */
> -		if (last == '.' && ch == '.')
> -			return -1; /* Refname contains "..". */
> -		if (last == '@' && ch == '{')
> -			return -1; /* Refname contains "@{". */
> +		case 4:
> +			return -1;
> +		}
>  		last = ch;
>  	}
> +out:
>  	if (cp == refname)
>  		return 0; /* Component has zero length. */
>  	if (refname[0] == '.') {
> diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
> index c289322..1571176 100755
> --- a/t/t5511-refspec.sh
> +++ b/t/t5511-refspec.sh
> @@ -5,7 +5,6 @@ test_description='refspec parsing'
>  . ./test-lib.sh
>  
>  test_refspec () {
> -
>  	kind=$1 refspec=$2 expect=$3
>  	git config remote.frotz.url "." &&
>  	git config --remove-section remote.frotz &&
> @@ -84,4 +83,9 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
>  test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
>  test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
>  
> +good=$(echo -n '\0377')

I think we avoid "echo -n" and use "printf" to be portable across
different echo implementations.

Use of \0377, which most likely to be just half-a-character, does
not feel a particularly good example, by the way.

> +test_refspec fetch "refs/heads/${good}"
> +bad=$(echo -n '\011')

Likewise.

> +test_refspec fetch "refs/heads/${bad}"				invalid
> +
>  test_done

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v5 1/2] refs.c: optimize check_refname_component()
@ 2014-06-03 18:21 David Turner
  2014-06-03 18:21 ` [PATCH v5 2/2] refs.c: SSE4.2 optimizations for check_refname_component David Turner
  0 siblings, 1 reply; 4+ messages in thread
From: David Turner @ 2014-06-03 18:21 UTC (permalink / raw)
  To: gitster, git, mhagger; +Cc: David Turner

In a repository with many refs, check_refname_component can be a major
contributor to the runtime of some git commands. One such command is

git rev-parse HEAD

Timings for one particular repo, with about 60k refs, almost all
packed, are:

Old: 35 ms
New: 29 ms

Many other commands which read refs are also sped up.

Signed-off-by: David Turner <dturner@twitter.com>
---
 refs.c             | 67 +++++++++++++++++++++++++++++++-----------------------
 t/t5511-refspec.sh |  6 ++++-
 2 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/refs.c b/refs.c
index 28d5eca..46139d2 100644
--- a/refs.c
+++ b/refs.c
@@ -6,8 +6,29 @@
 #include "string-list.h"
 
 /*
- * Make sure "ref" is something reasonable to have under ".git/refs/";
- * We do not like it if:
+ * How to handle various characters in refnames:
+ * 0: An acceptable character for refs
+ * 1: End-of-component
+ * 2: ., look for a preceding . to reject .. in refs
+ * 3: {, look for a preceding @ to reject @{ in refs
+ * 4: A bad character: ASCII control characters, "~", "^", ":" or SP
+ */
+static unsigned char refname_disposition[256] = {
+	1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
+};
+
+/*
+ * Try to read one refname component from the front of refname.
+ * Return the length of the component found, or -1 if the component is
+ * not legal.  It is legal if it is something reasonable to have under
+ * ".git/refs/"; We do not like it if:
  *
  * - any path component of it begins with ".", or
  * - it has double dots "..", or
@@ -16,41 +37,31 @@
  * - it ends with ".lock"
  * - it contains a "\" (backslash)
  */
-
-/* Return true iff ch is not allowed in reference names. */
-static inline int bad_ref_char(int ch)
-{
-	if (((unsigned) ch) <= ' ' || ch == 0x7f ||
-	    ch == '~' || ch == '^' || ch == ':' || ch == '\\')
-		return 1;
-	/* 2.13 Pattern Matching Notation */
-	if (ch == '*' || ch == '?' || ch == '[') /* Unsupported */
-		return 1;
-	return 0;
-}
-
-/*
- * Try to read one refname component from the front of refname.  Return
- * the length of the component found, or -1 if the component is not
- * legal.
- */
 static int check_refname_component(const char *refname, int flags)
 {
 	const char *cp;
 	char last = '\0';
 
 	for (cp = refname; ; cp++) {
-		char ch = *cp;
-		if (ch == '\0' || ch == '/')
+		int ch = *cp & 255;
+		unsigned char disp = refname_disposition[ch];
+		switch(disp) {
+		case 1:
+			goto out;
+		case 2:
+			if (last == '.')
+				return -1; /* Refname contains "..". */
+			break;
+		case 3:
+			if (last == '@')
+				return -1; /* Refname contains "@{". */
 			break;
-		if (bad_ref_char(ch))
-			return -1; /* Illegal character in refname. */
-		if (last == '.' && ch == '.')
-			return -1; /* Refname contains "..". */
-		if (last == '@' && ch == '{')
-			return -1; /* Refname contains "@{". */
+		case 4:
+			return -1;
+		}
 		last = ch;
 	}
+out:
 	if (cp == refname)
 		return 0; /* Component has zero length. */
 	if (refname[0] == '.') {
diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index c289322..de6db86 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -5,7 +5,6 @@ test_description='refspec parsing'
 . ./test-lib.sh
 
 test_refspec () {
-
 	kind=$1 refspec=$2 expect=$3
 	git config remote.frotz.url "." &&
 	git config --remove-section remote.frotz &&
@@ -84,4 +83,9 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
 test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
 test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
 
+good=$(printf '\303\204')
+test_refspec fetch "refs/heads/${good}"
+bad=$(printf '\011tab')
+test_refspec fetch "refs/heads/${bad}"				invalid
+
 test_done
-- 
2.0.0.rc1.18.gf763c0f

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v5 2/2] refs.c: SSE4.2 optimizations for check_refname_component
  2014-06-03 18:21 [PATCH v5 1/2] refs.c: optimize check_refname_component() David Turner
@ 2014-06-03 18:21 ` David Turner
  0 siblings, 0 replies; 4+ messages in thread
From: David Turner @ 2014-06-03 18:21 UTC (permalink / raw)
  To: gitster, git, mhagger; +Cc: David Turner

Optimize check_refname_component using SSE4.2, where available.

git rev-parse HEAD is a good test-case for this, since it does almost
nothing except parse refs.  For one particular repo with about 60k
refs, almost all packed, the timings are:

Look up table: 29 ms
SSE4.2:        25 ms

This is about a 15% improvement.

The configure.ac changes include code from the GNU C Library written
by Joseph S. Myers <joseph at codesourcery dot com>.

Signed-off-by: David Turner <dturner@twitter.com>
---
 Makefile           |   6 +++
 aclocal.m4         |   6 +++
 configure.ac       |  17 ++++++++
 git-compat-util.h  |  20 +++++++++
 refs.c             | 116 +++++++++++++++++++++++++++++++++++++++++++++--------
 t/t5511-refspec.sh |  13 ++++++
 6 files changed, 161 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile
index a53f3a8..dd2127a 100644
--- a/Makefile
+++ b/Makefile
@@ -1326,6 +1326,11 @@ else
 		COMPAT_OBJS += compat/win32mmap.o
 	endif
 endif
+ifdef NO_SSE42
+	BASIC_CFLAGS += -DNO_SSE42
+else
+	BASIC_CFLAGS += -msse4.2
+endif
 ifdef OBJECT_CREATION_USES_RENAMES
 	COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1
 endif
@@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE
 	@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
 	@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
 	@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@
+	@echo NO_SSE42=\''$(subst ','\'',$(subst ','\'',$(NO_SSE42)))'\' >>$@
 ifdef TEST_OUTPUT_DIRECTORY
 	@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@
 endif
diff --git a/aclocal.m4 b/aclocal.m4
index f11bc7e..d9f3f19 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -38,3 +38,9 @@ AC_DEFUN([TYPE_SOCKLEN_T],
       [#include <sys/types.h>
 #include <sys/socket.h>])
 ])
+
+dnl Test a compiler option or options with an empty input file.
+dnl LIBC_TRY_CC_OPTION([options], [action-if-true], [action-if-false])
+AC_DEFUN([LIBC_TRY_CC_OPTION],
+[AS_IF([AC_TRY_COMMAND([${CC-cc} $1 -xc /dev/null -S -o /dev/null])],
+	[$2], [$3])])
diff --git a/configure.ac b/configure.ac
index b711254..3a5bda9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -382,6 +382,23 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]),
 GIT_PARSE_WITH(tcltk))
 #
 
+# Declare the with-sse42/without-sse42 options.
+AC_ARG_WITH(sse42,
+AS_HELP_STRING([--with-sse42],[use SSE4.2 instructions])
+AS_HELP_STRING([],[(default is YES if your compiler supports -msse4.2)]),
+GIT_PARSE_WITH(sse42))
+
+if test "$NO_SSE42" != "YesPlease"; then
+   dnl Check if -msse4.2 works.
+   AC_CACHE_CHECK(for SSE4.2 support, cc_cv_sse42, [dnl
+   LIBC_TRY_CC_OPTION([-msse4.2], [cc_cv_sse42=yes], [cc_cv_sse42=no])
+   ])
+   if test $cc_cv_sse42 = no; then
+     NO_SSE42=1
+   fi
+fi
+
+GIT_CONF_SUBST([NO_SSE42])
 
 ## Checks for programs.
 AC_MSG_NOTICE([CHECKS for programs])
diff --git a/git-compat-util.h b/git-compat-util.h
index f6d3a46..254487a 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -668,6 +668,26 @@ void git_qsort(void *base, size_t nmemb, size_t size,
 #endif
 #endif
 
+#ifndef NO_SSE42
+#include <nmmintrin.h>
+/* Clang ships with a version of nmmintrin.h that's incomplete; if
+ * necessary, we define the constants that we're going to use. */
+#ifndef _SIDD_UBYTE_OPS
+#define _SIDD_UBYTE_OPS                 0x00
+#define _SIDD_CMP_EQUAL_ANY             0x00
+#define _SIDD_CMP_RANGES                0x04
+#define _SIDD_CMP_EQUAL_ORDERED         0x0c
+#define _SIDD_NEGATIVE_POLARITY         0x10
+#endif
+
+/* This is the system memory page size; it's used so that we can read
+ * outside the bounds of an allocation without segfaulting. It is
+ * assumed to be a power of 2. */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+#endif
+
 #ifdef UNRELIABLE_FSTAT
 #define fstat_is_reliable() 0
 #else
diff --git a/refs.c b/refs.c
index 46139d2..532aaf4 100644
--- a/refs.c
+++ b/refs.c
@@ -24,6 +24,25 @@ static unsigned char refname_disposition[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4
 };
 
+static int check_refname_component_trailer(const char *cp, const char *refname, int flags)
+{
+	if (cp == refname)
+		return 0; /* Component has zero length. */
+	if (refname[0] == '.') {
+		if (!(flags & REFNAME_DOT_COMPONENT))
+			return -1; /* Component starts with '.'. */
+		/*
+		 * Even if leading dots are allowed, don't allow "."
+		 * as a component (".." is prevented by a rule above).
+		 */
+		if (refname[1] == '\0')
+			return -1; /* Component equals ".". */
+	}
+	if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+		return -1; /* Refname ends with ".lock". */
+	return cp - refname;
+}
+
 /*
  * Try to read one refname component from the front of refname.
  * Return the length of the component found, or -1 if the component is
@@ -37,7 +56,7 @@ static unsigned char refname_disposition[256] = {
  * - it ends with ".lock"
  * - it contains a "\" (backslash)
  */
-static int check_refname_component(const char *refname, int flags)
+static int check_refname_component_1(const char *refname, int flags)
 {
 	const char *cp;
 	char last = '\0';
@@ -47,7 +66,7 @@ static int check_refname_component(const char *refname, int flags)
 		unsigned char disp = refname_disposition[ch];
 		switch(disp) {
 		case 1:
-			goto out;
+			return check_refname_component_trailer(cp, refname, flags);
 		case 2:
 			if (last == '.')
 				return -1; /* Refname contains "..". */
@@ -61,23 +80,86 @@ static int check_refname_component(const char *refname, int flags)
 		}
 		last = ch;
 	}
-out:
-	if (cp == refname)
-		return 0; /* Component has zero length. */
-	if (refname[0] == '.') {
-		if (!(flags & REFNAME_DOT_COMPONENT))
-			return -1; /* Component starts with '.'. */
-		/*
-		 * Even if leading dots are allowed, don't allow "."
-		 * as a component (".." is prevented by a rule above).
-		 */
-		if (refname[1] == '\0')
-			return -1; /* Component equals ".". */
+}
+
+#ifdef NO_SSE42
+#define check_refname_component check_refname_component_1
+#else
+#define BLOCK_SIZE 16
+
+/* Vectorized version of check_refname_component */
+static int check_refname_component(const char *refname, int flags)
+{
+	const __m128i *refname_vec = (__m128i*) refname;
+
+	/* Character ranges for characters forbidden in refs; see
+	 * above */
+	static const __v16qi bad = {
+		0x01, 0x20,  0x7e, 0x7f,  0x5e, 0x5e,  0x3a, 0x3a,
+		0x5b, 0x5c,  0x2a, 0x2a,  0x3f, 0x3f,  0x3f, 0x3f};
+
+	static const __v16qi nonslashes = {
+		'\001', '/' -1, '/' + 1, 0xff,
+	};
+
+	static const __v16qi dotdot = {'.','.',0};
+	static const __v16qi atcurly = {'@','{',0};
+
+	const __m128i *vp;
+	const char *cp = (const char *)refname_vec;
+
+	int dotdotpos = BLOCK_SIZE, atcurlypos = BLOCK_SIZE;
+	for (vp = refname_vec; ; vp++) {
+		__m128i tmp;
+		int endpos;
+
+		/* Handle case of forbidden substrings .. and @{ crossing
+		 * sixteen-byte boundaries */
+		if (dotdotpos == 15 && *cp == '.')
+			return -1;
+
+		if (atcurlypos == 15 && *cp == '{')
+			return -1;
+
+		if (((uintptr_t) vp & (PAGE_SIZE - 1)) > PAGE_SIZE - BLOCK_SIZE)
+			/* End-of-page; fall back to slow method for
+			 * this entire component. */
+			return check_refname_component_1(refname, flags);
+
+		tmp = _mm_lddqu_si128(vp);
+
+		/* Find slashes or end-of-string. The double-negative
+		 * (negative-polarity search for non-slashes) is
+		 * necessary so that \0 will also be counted.  */
+		endpos = _mm_cmpistri((__m128i) nonslashes, tmp,
+				      _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |
+				      _SIDD_NEGATIVE_POLARITY);
+
+		if (_mm_cmpestrc((__m128i) bad, BLOCK_SIZE, tmp, endpos,
+				 _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES))
+			return -1;
+
+		dotdotpos = _mm_cmpestri((__m128i) dotdot, 2, tmp, endpos,
+					 _SIDD_UBYTE_OPS |
+					 _SIDD_CMP_EQUAL_ORDERED);
+		if (dotdotpos < 15)
+			return -1;
+
+		atcurlypos = _mm_cmpestri((__m128i) atcurly, 2, tmp, endpos,
+					  _SIDD_UBYTE_OPS |
+					  _SIDD_CMP_EQUAL_ORDERED);
+		if (atcurlypos < 15)
+			return -1;
+
+		if (endpos < BLOCK_SIZE) {
+			cp = ((const char*) vp) + endpos;
+			break;
+		}
+		cp = (const char*) vp + BLOCK_SIZE;
 	}
-	if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
-		return -1; /* Refname ends with ".lock". */
-	return cp - refname;
+	return check_refname_component_trailer(cp, refname, flags);
 }
+#endif
 
 int check_refname_format(const char *refname, int flags)
 {
diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index de6db86..7f1bd74 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -88,4 +88,17 @@ test_refspec fetch "refs/heads/${good}"
 bad=$(printf '\011tab')
 test_refspec fetch "refs/heads/${bad}"				invalid
 
+test_refspec fetch 'refs/heads/a-very-long-refname'
+test_refspec fetch 'refs/heads/.a-very-long-refname'		invalid
+test_refspec fetch 'refs/heads/abcdefgh0123..'			invalid
+test_refspec fetch 'refs/heads/abcdefgh01234..'			invalid
+test_refspec fetch 'refs/heads/abcdefgh012345..'		invalid
+test_refspec fetch 'refs/heads/abcdefgh0123456..'		invalid
+test_refspec fetch 'refs/heads/abcdefgh01234567..'		invalid
+test_refspec fetch 'refs/heads/abcdefgh0123.a'
+test_refspec fetch 'refs/heads/abcdefgh01234.a'
+test_refspec fetch 'refs/heads/abcdefgh012345.a'
+test_refspec fetch 'refs/heads/abcdefgh0123456.a'
+test_refspec fetch 'refs/heads/abcdefgh01234567.a'
+
 test_done
-- 
2.0.0.rc1.18.gf763c0f

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-06-03 18:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-03 18:21 [PATCH v5 1/2] refs.c: optimize check_refname_component() David Turner
2014-06-03 18:21 ` [PATCH v5 2/2] refs.c: SSE4.2 optimizations for check_refname_component David Turner
  -- strict thread matches above, loose matches on Subject: below --
2014-06-02  6:20 [PATCH v5 1/2] refs.c: optimize check_refname_component() David Turner
2014-06-03 18:08 ` Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).