* [PATCH] check_refname_component: Optimize
@ 2014-05-28 19:57 David Turner
2014-05-28 21:24 ` Junio C Hamano
2014-05-29 12:19 ` brian m. carlson
0 siblings, 2 replies; 17+ messages in thread
From: David Turner @ 2014-05-28 19:57 UTC (permalink / raw)
To: git, mhagger, gitster; +Cc: David Turner
In a repository with tens of thousands of refs, the command
~/git/git-diff-index --cached --quiet --ignore-submodules [revision]
is a bit slow. check_refname_component is a major contributor to the
runtime. Provide two optimized versions of this function: one for
machines with SSE4.2, and one for machines without. The speedup for
this command on one particular repo (with about 60k refs) is about 12%
for the SSE version and 8% for the non-SSE version.
Signed-off-by: David Turner <dturner@twitter.com>
---
| 6 +++
| 6 +++
| 143 +++++++++++++++++++++++++++++++++++++++++++++++++++--
| 13 +++++
4 files changed, 163 insertions(+), 5 deletions(-)
--git a/Makefile b/Makefile
index a53f3a8..123e2fc 100644
--- a/Makefile
+++ b/Makefile
@@ -1326,6 +1326,11 @@ else
COMPAT_OBJS += compat/win32mmap.o
endif
endif
+ifdef NO_SSE
+ BASIC_CFLAGS += -DNO_SSE
+else
+ BASIC_CFLAGS += -msse4
+endif
ifdef OBJECT_CREATION_USES_RENAMES
COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1
endif
@@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@
+ @echo NO_SSE=\''$(subst ','\'',$(subst ','\'',$(NO_SSE)))'\' >>$@
ifdef TEST_OUTPUT_DIRECTORY
@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@
endif
--git a/configure.ac b/configure.ac
index b711254..71a9429 100644
--- a/configure.ac
+++ b/configure.ac
@@ -382,6 +382,11 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]),
GIT_PARSE_WITH(tcltk))
#
+# Declare the with-sse/without-sse options.
+AC_ARG_WITH(sse,
+AS_HELP_STRING([--with-sse],[use SSE instructions (default is YES)]),
+GIT_PARSE_WITH(sse))
+
## Checks for programs.
AC_MSG_NOTICE([CHECKS for programs])
@@ -449,6 +454,7 @@ else
fi
fi
GIT_CONF_SUBST([TCLTK_PATH])
+GIT_CONF_SUBST([NO_SSE])
AC_CHECK_PROGS(ASCIIDOC, [asciidoc])
if test -n "$ASCIIDOC"; then
AC_MSG_CHECKING([for asciidoc version])
--git a/refs.c b/refs.c
index 28d5eca..8ca124c 100644
--- a/refs.c
+++ b/refs.c
@@ -5,6 +5,8 @@
#include "dir.h"
#include "string-list.h"
+#include <nmmintrin.h>
+
/*
* Make sure "ref" is something reasonable to have under ".git/refs/";
* We do not like it if:
@@ -29,30 +31,160 @@ static inline int bad_ref_char(int ch)
return 0;
}
+char refname_disposition[] = {
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 2, 1,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 9, 0,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 9, 0, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 9, 0, 0,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
+};
+
/*
* Try to read one refname component from the front of refname. Return
* the length of the component found, or -1 if the component is not
* legal.
*/
-static int check_refname_component(const char *refname, int flags)
+static int check_refname_component_1(const char *refname, int flags)
{
const char *cp;
char last = '\0';
for (cp = refname; ; cp++) {
- char ch = *cp;
- if (ch == '\0' || ch == '/')
+ unsigned char ch = (unsigned char) *cp;
+ char disp = refname_disposition[ch];
+ switch(disp) {
+ case 0:
+ return -1;
+ case 1:
+ goto out;
+ case 2:
+ if (last == '.')
+ return -1;
break;
- if (bad_ref_char(ch))
- return -1; /* Illegal character in refname. */
+ case 3:
+ if (last == '@')
+ return -1;
+ break;
+ }
+
if (last == '.' && ch == '.')
return -1; /* Refname contains "..". */
if (last == '@' && ch == '{')
return -1; /* Refname contains "@{". */
last = ch;
}
+out:
+ if (cp == refname)
+ return 0; /* Component has zero length. */
+
+ if (refname[0] == '.') {
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1; /* Component starts with '.'. */
+ /*
+ * Even if leading dots are allowed, don't allow "."
+ * as a component (".." is prevented by a rule above).
+ */
+ if (refname[1] == '\0')
+ return -1; /* Component equals ".". */
+ }
+ if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+ return -1; /* Refname ends with ".lock". */
+ return cp - refname;
+}
+
+#ifdef NO_SSE
+#define check_refname_component check_refname_component_1
+#else
+
+#define PAGE_SIZE 4096
+#define BLOCK_SIZE 16
+
+/* Vectorized version of check_refname_component */
+static int check_refname_component(const char *refname, int flags)
+{
+ const __m128i *refname_vec = (__m128i*) refname;
+
+ /* Character ranges for characters forbidden in refs; see
+ * above */
+ static const __v16qi bad = {
+ 0x01, 0x20, 0x7e, 0x7f, 0x5e, 0x5e, 0x3a, 0x3a,
+ 0x5b, 0x5c, 0x2a, 0x2a, 0x3f, 0x3f, 0x3f, 0x3f};
+
+ static const __v16qi nonslashes = {
+ '\001', '/' -1, '/' + 1, 0xff,
+ };
+
+ static const __v16qi dotdot = {'.','.',0};
+ static const __v16qi atcurly = {'@','{',0};
+
+ const __m128i *vp;
+ const char *cp = (const char *)refname_vec;
+
+
+ int dotdotpos = BLOCK_SIZE, atcurlypos = BLOCK_SIZE;
+ for (vp = refname_vec; ; vp++) {
+ __m128i tmp;
+ int endpos;
+
+ /* Handle case of forbidden substrings .. and @{ crossing
+ * sixteen-byte boudaries */
+ if (dotdotpos == 15 && *cp == '.')
+ return -1;
+
+ if (atcurlypos == 15 && *cp == '{')
+ return -1;
+
+ if (((uintptr_t) vp & (PAGE_SIZE - 1)) > PAGE_SIZE - BLOCK_SIZE)
+ /* End-of-page; fall back to slow method for
+ * this entire component. */
+ return check_refname_component_1(refname, flags);
+
+ tmp = _mm_lddqu_si128(vp);
+
+ /* Find slashes or end-of-string. The double-negative
+ * (negative-polarity search for non-slashes) is
+ * necessary so that \0 will also be counted. */
+ endpos = _mm_cmpistri((__m128i) nonslashes, tmp,
+ _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |
+ _SIDD_NEGATIVE_POLARITY);
+
+ if (_mm_cmpestrc((__m128i) bad, BLOCK_SIZE, tmp, endpos,
+ _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES))
+ return -1;
+
+ dotdotpos = _mm_cmpestri((__m128i) dotdot, 2, tmp, endpos,
+ _SIDD_UBYTE_OPS |
+ _SIDD_CMP_EQUAL_ORDERED);
+ if (dotdotpos < 15)
+ return -1;
+
+ atcurlypos = _mm_cmpestri((__m128i) atcurly, 2, tmp, endpos,
+ _SIDD_UBYTE_OPS |
+ _SIDD_CMP_EQUAL_ORDERED);
+ if (atcurlypos < 15)
+ return -1;
+
+ if (endpos < BLOCK_SIZE) {
+ cp = ((const char*) vp) + endpos;
+ break;
+ }
+ cp = (const char*) vp + BLOCK_SIZE;
+ }
+
if (cp == refname)
return 0; /* Component has zero length. */
+
if (refname[0] == '.') {
if (!(flags & REFNAME_DOT_COMPONENT))
return -1; /* Component starts with '.'. */
@@ -67,6 +199,7 @@ static int check_refname_component(const char *refname, int flags)
return -1; /* Refname ends with ".lock". */
return cp - refname;
}
+#endif
int check_refname_format(const char *refname, int flags)
{
--git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index c289322..0f03f9c 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -84,4 +84,17 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
+test_refspec fetch 'refs/heads/a-very-long-refname'
+test_refspec fetch 'refs/heads/.a-very-long-refname' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234..' invalid
+test_refspec fetch 'refs/heads/abcdefgh012345..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123.a'
+test_refspec fetch 'refs/heads/abcdefgh01234.a'
+test_refspec fetch 'refs/heads/abcdefgh012345.a'
+test_refspec fetch 'refs/heads/abcdefgh0123456.a'
+test_refspec fetch 'refs/heads/abcdefgh01234567.a'
+
test_done
--
2.0.0.rc1.18.gf763c0f
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH] check_refname_component: Optimize
2014-05-28 21:04 [PATCH v2 0/1] " David Turner
@ 2014-05-28 21:04 ` David Turner
2014-05-28 21:44 ` Michael Haggerty
0 siblings, 1 reply; 17+ messages in thread
From: David Turner @ 2014-05-28 21:04 UTC (permalink / raw)
To: git, gitster, mhagger; +Cc: David Turner
In a repository with tens of thousands of refs, the command
~/git/git-diff-index --cached --quiet --ignore-submodules [revision]
is a bit slow. check_refname_component is a major contributor to the
runtime. Provide two optimized versions of this function: one for
machines with SSE4.2, and one for machines without. The speedup for
this command on one particular repo (with about 60k refs) is about 12%
for the SSE version and 8% for the non-SSE version.
Signed-off-by: David Turner <dturner@twitter.com>
---
| 6 +++
| 6 +++
| 152 +++++++++++++++++++++++++++++++++++++++++++++++++++--
| 13 +++++
4 files changed, 172 insertions(+), 5 deletions(-)
--git a/Makefile b/Makefile
index a53f3a8..123e2fc 100644
--- a/Makefile
+++ b/Makefile
@@ -1326,6 +1326,11 @@ else
COMPAT_OBJS += compat/win32mmap.o
endif
endif
+ifdef NO_SSE
+ BASIC_CFLAGS += -DNO_SSE
+else
+ BASIC_CFLAGS += -msse4
+endif
ifdef OBJECT_CREATION_USES_RENAMES
COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1
endif
@@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
@echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@
+ @echo NO_SSE=\''$(subst ','\'',$(subst ','\'',$(NO_SSE)))'\' >>$@
ifdef TEST_OUTPUT_DIRECTORY
@echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@
endif
--git a/configure.ac b/configure.ac
index b711254..71a9429 100644
--- a/configure.ac
+++ b/configure.ac
@@ -382,6 +382,11 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]),
GIT_PARSE_WITH(tcltk))
#
+# Declare the with-sse/without-sse options.
+AC_ARG_WITH(sse,
+AS_HELP_STRING([--with-sse],[use SSE instructions (default is YES)]),
+GIT_PARSE_WITH(sse))
+
## Checks for programs.
AC_MSG_NOTICE([CHECKS for programs])
@@ -449,6 +454,7 @@ else
fi
fi
GIT_CONF_SUBST([TCLTK_PATH])
+GIT_CONF_SUBST([NO_SSE])
AC_CHECK_PROGS(ASCIIDOC, [asciidoc])
if test -n "$ASCIIDOC"; then
AC_MSG_CHECKING([for asciidoc version])
--git a/refs.c b/refs.c
index 28d5eca..8f0de04 100644
--- a/refs.c
+++ b/refs.c
@@ -5,6 +5,8 @@
#include "dir.h"
#include "string-list.h"
+#include <nmmintrin.h>
+
/*
* Make sure "ref" is something reasonable to have under ".git/refs/";
* We do not like it if:
@@ -29,30 +31,169 @@ static inline int bad_ref_char(int ch)
return 0;
}
+char refname_disposition[] = {
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 2, 1,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 9, 0,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 9, 0, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 9, 0, 0,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
+};
+
/*
* Try to read one refname component from the front of refname. Return
* the length of the component found, or -1 if the component is not
* legal.
*/
-static int check_refname_component(const char *refname, int flags)
+static int check_refname_component_1(const char *refname, int flags)
{
const char *cp;
char last = '\0';
for (cp = refname; ; cp++) {
- char ch = *cp;
- if (ch == '\0' || ch == '/')
+ unsigned char ch = (unsigned char) *cp;
+ char disp = refname_disposition[ch];
+ switch(disp) {
+ case 0:
+ return -1;
+ case 1:
+ goto out;
+ case 2:
+ if (last == '.')
+ return -1;
break;
- if (bad_ref_char(ch))
- return -1; /* Illegal character in refname. */
+ case 3:
+ if (last == '@')
+ return -1;
+ break;
+ }
+
if (last == '.' && ch == '.')
return -1; /* Refname contains "..". */
if (last == '@' && ch == '{')
return -1; /* Refname contains "@{". */
last = ch;
}
+out:
+ if (cp == refname)
+ return 0; /* Component has zero length. */
+
+ if (refname[0] == '.') {
+ if (!(flags & REFNAME_DOT_COMPONENT))
+ return -1; /* Component starts with '.'. */
+ /*
+ * Even if leading dots are allowed, don't allow "."
+ * as a component (".." is prevented by a rule above).
+ */
+ if (refname[1] == '\0')
+ return -1; /* Component equals ".". */
+ }
+ if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
+ return -1; /* Refname ends with ".lock". */
+ return cp - refname;
+}
+
+#ifdef NO_SSE
+#define check_refname_component check_refname_component_1
+#else
+
+#ifndef _SIDD_UBYTE_OPS
+#define _SIDD_UBYTE_OPS 0x00
+#define _SIDD_CMP_EQUAL_ANY 0x00
+#define _SIDD_CMP_RANGES 0x04
+#define _SIDD_CMP_EQUAL_ORDERED 0x0c
+#define _SIDD_NEGATIVE_POLARITY 0x10
+#endif
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+#define BLOCK_SIZE 16
+
+/* Vectorized version of check_refname_component */
+static int check_refname_component(const char *refname, int flags)
+{
+ const __m128i *refname_vec = (__m128i*) refname;
+
+ /* Character ranges for characters forbidden in refs; see
+ * above */
+ static const __v16qi bad = {
+ 0x01, 0x20, 0x7e, 0x7f, 0x5e, 0x5e, 0x3a, 0x3a,
+ 0x5b, 0x5c, 0x2a, 0x2a, 0x3f, 0x3f, 0x3f, 0x3f};
+
+ static const __v16qi nonslashes = {
+ '\001', '/' -1, '/' + 1, 0xff,
+ };
+
+ static const __v16qi dotdot = {'.','.',0};
+ static const __v16qi atcurly = {'@','{',0};
+
+ const __m128i *vp;
+ const char *cp = (const char *)refname_vec;
+
+
+ int dotdotpos = BLOCK_SIZE, atcurlypos = BLOCK_SIZE;
+ for (vp = refname_vec; ; vp++) {
+ __m128i tmp;
+ int endpos;
+
+ /* Handle case of forbidden substrings .. and @{ crossing
+ * sixteen-byte boudaries */
+ if (dotdotpos == 15 && *cp == '.')
+ return -1;
+
+ if (atcurlypos == 15 && *cp == '{')
+ return -1;
+
+ if (((uintptr_t) vp & (PAGE_SIZE - 1)) > PAGE_SIZE - BLOCK_SIZE)
+ /* End-of-page; fall back to slow method for
+ * this entire component. */
+ return check_refname_component_1(refname, flags);
+
+ tmp = _mm_lddqu_si128(vp);
+
+ /* Find slashes or end-of-string. The double-negative
+ * (negative-polarity search for non-slashes) is
+ * necessary so that \0 will also be counted. */
+ endpos = _mm_cmpistri((__m128i) nonslashes, tmp,
+ _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |
+ _SIDD_NEGATIVE_POLARITY);
+
+ if (_mm_cmpestrc((__m128i) bad, BLOCK_SIZE, tmp, endpos,
+ _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES))
+ return -1;
+
+ dotdotpos = _mm_cmpestri((__m128i) dotdot, 2, tmp, endpos,
+ _SIDD_UBYTE_OPS |
+ _SIDD_CMP_EQUAL_ORDERED);
+ if (dotdotpos < 15)
+ return -1;
+
+ atcurlypos = _mm_cmpestri((__m128i) atcurly, 2, tmp, endpos,
+ _SIDD_UBYTE_OPS |
+ _SIDD_CMP_EQUAL_ORDERED);
+ if (atcurlypos < 15)
+ return -1;
+
+ if (endpos < BLOCK_SIZE) {
+ cp = ((const char*) vp) + endpos;
+ break;
+ }
+ cp = (const char*) vp + BLOCK_SIZE;
+ }
+
if (cp == refname)
return 0; /* Component has zero length. */
+
if (refname[0] == '.') {
if (!(flags & REFNAME_DOT_COMPONENT))
return -1; /* Component starts with '.'. */
@@ -67,6 +208,7 @@ static int check_refname_component(const char *refname, int flags)
return -1; /* Refname ends with ".lock". */
return cp - refname;
}
+#endif
int check_refname_format(const char *refname, int flags)
{
--git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
index c289322..0f03f9c 100755
--- a/t/t5511-refspec.sh
+++ b/t/t5511-refspec.sh
@@ -84,4 +84,17 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
+test_refspec fetch 'refs/heads/a-very-long-refname'
+test_refspec fetch 'refs/heads/.a-very-long-refname' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234..' invalid
+test_refspec fetch 'refs/heads/abcdefgh012345..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid
+test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid
+test_refspec fetch 'refs/heads/abcdefgh0123.a'
+test_refspec fetch 'refs/heads/abcdefgh01234.a'
+test_refspec fetch 'refs/heads/abcdefgh012345.a'
+test_refspec fetch 'refs/heads/abcdefgh0123456.a'
+test_refspec fetch 'refs/heads/abcdefgh01234567.a'
+
test_done
--
2.0.0.rc1.18.gf763c0f
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-28 19:57 [PATCH] check_refname_component: Optimize David Turner
@ 2014-05-28 21:24 ` Junio C Hamano
2014-05-29 12:19 ` brian m. carlson
1 sibling, 0 replies; 17+ messages in thread
From: Junio C Hamano @ 2014-05-28 21:24 UTC (permalink / raw)
To: David Turner; +Cc: git, mhagger, David Turner
David Turner <dturner@twopensource.com> writes:
> In a repository with tens of thousands of refs, the command
> ~/git/git-diff-index --cached --quiet --ignore-submodules [revision]
> is a bit slow. check_refname_component is a major contributor to the
> runtime. Provide two optimized versions of this function: one for
> machines with SSE4.2, and one for machines without. The speedup for
> this command on one particular repo (with about 60k refs) is about 12%
> for the SSE version and 8% for the non-SSE version.
>
> Signed-off-by: David Turner <dturner@twitter.com>
Just a few quick impressions (I do not have time today to look at
new patches).
- The title seems a bit strange.
Perhaps "refs.c: optimize check_refname_component()" or something?
- "~/git/git-diff-index" looks doubly strange in that the issue you
are addressing would not depend on your compiled Git being
installed in your $HOME/git at all. For that matter, from the
command line and the patch, I am not sure if this is specific to
"git diff-index", or the same issue exists for anything that
takes revs and pathspecs from the command line.
> ---
> Makefile | 6 +++
> configure.ac | 6 +++
> refs.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++--
> t/t5511-refspec.sh | 13 +++++
> 4 files changed, 163 insertions(+), 5 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index a53f3a8..123e2fc 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1326,6 +1326,11 @@ else
> COMPAT_OBJS += compat/win32mmap.o
> endif
> endif
> +ifdef NO_SSE
> + BASIC_CFLAGS += -DNO_SSE
> +else
> + BASIC_CFLAGS += -msse4
> +endif
> ifdef OBJECT_CREATION_USES_RENAMES
> COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1
> endif
> @@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE
> @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
> @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
> @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@
> + @echo NO_SSE=\''$(subst ','\'',$(subst ','\'',$(NO_SSE)))'\' >>$@
> ifdef TEST_OUTPUT_DIRECTORY
> @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@
> endif
> diff --git a/configure.ac b/configure.ac
> index b711254..71a9429 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -382,6 +382,11 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]),
> GIT_PARSE_WITH(tcltk))
> #
>
> +# Declare the with-sse/without-sse options.
> +AC_ARG_WITH(sse,
> +AS_HELP_STRING([--with-sse],[use SSE instructions (default is YES)]),
> +GIT_PARSE_WITH(sse))
> +
>
> ## Checks for programs.
> AC_MSG_NOTICE([CHECKS for programs])
> @@ -449,6 +454,7 @@ else
> fi
> fi
> GIT_CONF_SUBST([TCLTK_PATH])
> +GIT_CONF_SUBST([NO_SSE])
> AC_CHECK_PROGS(ASCIIDOC, [asciidoc])
> if test -n "$ASCIIDOC"; then
> AC_MSG_CHECKING([for asciidoc version])
> diff --git a/refs.c b/refs.c
> index 28d5eca..8ca124c 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -5,6 +5,8 @@
> #include "dir.h"
> #include "string-list.h"
>
> +#include <nmmintrin.h>
> +
We would prefer not to add inclusion of any system header files in
random *.c files, as there often are system dependencies (order of
inclusion, definition of feature macros, etc.) we would rather want
to encapsulate in one place, that is git-compat-util.h.
> /*
> * Make sure "ref" is something reasonable to have under ".git/refs/";
> * We do not like it if:
> @@ -29,30 +31,160 @@ static inline int bad_ref_char(int ch)
> return 0;
> }
>
> +char refname_disposition[] = {
> + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> ...
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
> +};
> +
Does this array need to be extern?
Is this table designed to take both positive and negative values?
If the answer is "I expect we add only positive", then please make
it explicitly "unsigned char".
What do these magic numbers in the array mean?
How were the values derived? What are you doing in this commit to
help others who later need to debug, fix and enhance this table?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-28 21:04 ` [PATCH] " David Turner
@ 2014-05-28 21:44 ` Michael Haggerty
2014-05-28 23:49 ` David Turner
0 siblings, 1 reply; 17+ messages in thread
From: Michael Haggerty @ 2014-05-28 21:44 UTC (permalink / raw)
To: David Turner, git, gitster; +Cc: David Turner
On 05/28/2014 11:04 PM, David Turner wrote:
> In a repository with tens of thousands of refs, the command
> ~/git/git-diff-index --cached --quiet --ignore-submodules [revision]
> is a bit slow. check_refname_component is a major contributor to the
> runtime. Provide two optimized versions of this function: one for
> machines with SSE4.2, and one for machines without. The speedup for
> this command on one particular repo (with about 60k refs) is about 12%
> for the SSE version and 8% for the non-SSE version.
Interesting. Thanks for the patch.
Why do you use "git diff-index" to benchmark your change? Is there
something particular about that command that leads to especially bad
performance with lots of references?
I assume that most of the time spent in check_refname_component() is
while reading the packed-refs file, right? If so, there are probably
other git commands that would better measure that time, with less other
work. For example, "git rev-parse refname" will read the packed-refs
file, too (assuming that "refname" is not a loose reference). If you
test the speedup of a cheaper command like that, it seems to me that you
will get a better idea of how much you are speeding up the ref-reading
part. It would also be interesting to see the difference in
milliseconds (rather than a percentage) for some specified number of
references.
I think it's worth using your LUT-based approach to save 8%. I'm less
sure it's worth the complication and unreadability of using SSE to get
the next 4% speedup. But the gains might be proven to be more
significant if you benchmark them differently.
I won't critique the code in detail until we have thrashed out the
metaissues, but I made a few comments below about nits that I noticed
when I scanned through.
> Signed-off-by: David Turner <dturner@twitter.com>
> ---
> Makefile | 6 +++
> configure.ac | 6 +++
> refs.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++--
> t/t5511-refspec.sh | 13 +++++
> 4 files changed, 172 insertions(+), 5 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index a53f3a8..123e2fc 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -1326,6 +1326,11 @@ else
> COMPAT_OBJS += compat/win32mmap.o
> endif
> endif
> +ifdef NO_SSE
> + BASIC_CFLAGS += -DNO_SSE
> +else
> + BASIC_CFLAGS += -msse4
> +endif
> ifdef OBJECT_CREATION_USES_RENAMES
> COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1
> endif
> @@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE
> @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
> @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
> @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@
> + @echo NO_SSE=\''$(subst ','\'',$(subst ','\'',$(NO_SSE)))'\' >>$@
> ifdef TEST_OUTPUT_DIRECTORY
> @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@
> endif
> diff --git a/configure.ac b/configure.ac
> index b711254..71a9429 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -382,6 +382,11 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]),
> GIT_PARSE_WITH(tcltk))
> #
>
> +# Declare the with-sse/without-sse options.
> +AC_ARG_WITH(sse,
> +AS_HELP_STRING([--with-sse],[use SSE instructions (default is YES)]),
> +GIT_PARSE_WITH(sse))
> +
>
> ## Checks for programs.
> AC_MSG_NOTICE([CHECKS for programs])
> @@ -449,6 +454,7 @@ else
> fi
> fi
> GIT_CONF_SUBST([TCLTK_PATH])
> +GIT_CONF_SUBST([NO_SSE])
> AC_CHECK_PROGS(ASCIIDOC, [asciidoc])
> if test -n "$ASCIIDOC"; then
> AC_MSG_CHECKING([for asciidoc version])
> diff --git a/refs.c b/refs.c
> index 28d5eca..8f0de04 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -5,6 +5,8 @@
> #include "dir.h"
> #include "string-list.h"
>
> +#include <nmmintrin.h>
> +
You include this file unconditionally, but I doubt that it is present on
all platforms. I guess it has to be protected with #ifdef or something
at a minimum.
> /*
> * Make sure "ref" is something reasonable to have under ".git/refs/";
> * We do not like it if:
> @@ -29,30 +31,169 @@ static inline int bad_ref_char(int ch)
> return 0;
> }
>
Please add a comment here about what the values in refname_disposition
signify. And maybe add some line comments explaining unusual values,
for people who haven't memorized the ASCII encoding; e.g., on the third
line,
/* SP -> 0, '.' -> 2, '-' -> 1 */
Also, this variable could be static. And if you change your encoding to
use 0 instead of 9 for valid characters, then you could define the table
with an explicit length of 256 and omit the second half of the
initializers, letting those values be initialized to zero automatically.
> +char refname_disposition[] = {
> + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 2, 1,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 9, 9, 9, 0,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 0, 9, 0, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 9, 0, 0,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
> + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9
> +};
> +
> /*
> * Try to read one refname component from the front of refname. Return
> * the length of the component found, or -1 if the component is not
> * legal.
> */
> -static int check_refname_component(const char *refname, int flags)
> +static int check_refname_component_1(const char *refname, int flags)
> {
> const char *cp;
> char last = '\0';
>
> for (cp = refname; ; cp++) {
> - char ch = *cp;
> - if (ch == '\0' || ch == '/')
> + unsigned char ch = (unsigned char) *cp;
> + char disp = refname_disposition[ch];
> + switch(disp) {
> + case 0:
> + return -1;
> + case 1:
> + goto out;
> + case 2:
> + if (last == '.')
> + return -1;
> break;
> - if (bad_ref_char(ch))
> - return -1; /* Illegal character in refname. */
> + case 3:
> + if (last == '@')
> + return -1;
> + break;
> + }
You seem to have some indentation problems. Please always indent with
TAB characters.
> +
> if (last == '.' && ch == '.')
> return -1; /* Refname contains "..". */
> if (last == '@' && ch == '{')
> return -1; /* Refname contains "@{". */
> last = ch;
> }
> +out:
> + if (cp == refname)
> + return 0; /* Component has zero length. */
> +
> + if (refname[0] == '.') {
> + if (!(flags & REFNAME_DOT_COMPONENT))
> + return -1; /* Component starts with '.'. */
> + /*
> + * Even if leading dots are allowed, don't allow "."
> + * as a component (".." is prevented by a rule above).
> + */
> + if (refname[1] == '\0')
> + return -1; /* Component equals ".". */
> + }
> + if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5))
> + return -1; /* Refname ends with ".lock". */
> + return cp - refname;
> +}
> +
> +#ifdef NO_SSE
> +#define check_refname_component check_refname_component_1
> +#else
> +
> +#ifndef _SIDD_UBYTE_OPS
> +#define _SIDD_UBYTE_OPS 0x00
> +#define _SIDD_CMP_EQUAL_ANY 0x00
> +#define _SIDD_CMP_RANGES 0x04
> +#define _SIDD_CMP_EQUAL_ORDERED 0x0c
> +#define _SIDD_NEGATIVE_POLARITY 0x10
> +#endif
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE 4096
> +#endif
> +#define BLOCK_SIZE 16
> +
> +/* Vectorized version of check_refname_component */
> +static int check_refname_component(const char *refname, int flags)
> +{
> + const __m128i *refname_vec = (__m128i*) refname;
> +
> + /* Character ranges for characters forbidden in refs; see
> + * above */
> + static const __v16qi bad = {
> + 0x01, 0x20, 0x7e, 0x7f, 0x5e, 0x5e, 0x3a, 0x3a,
> + 0x5b, 0x5c, 0x2a, 0x2a, 0x3f, 0x3f, 0x3f, 0x3f};
> +
> + static const __v16qi nonslashes = {
> + '\001', '/' -1, '/' + 1, 0xff,
> + };
> +
> + static const __v16qi dotdot = {'.','.',0};
> + static const __v16qi atcurly = {'@','{',0};
> +
> + const __m128i *vp;
> + const char *cp = (const char *)refname_vec;
> +
> +
> + int dotdotpos = BLOCK_SIZE, atcurlypos = BLOCK_SIZE;
> + for (vp = refname_vec; ; vp++) {
> + __m128i tmp;
> + int endpos;
> +
> + /* Handle case of forbidden substrings .. and @{ crossing
> + * sixteen-byte boudaries */
> + if (dotdotpos == 15 && *cp == '.')
> + return -1;
> +
> + if (atcurlypos == 15 && *cp == '{')
> + return -1;
> +
> + if (((uintptr_t) vp & (PAGE_SIZE - 1)) > PAGE_SIZE - BLOCK_SIZE)
> + /* End-of-page; fall back to slow method for
> + * this entire component. */
> + return check_refname_component_1(refname, flags);
> +
> + tmp = _mm_lddqu_si128(vp);
> +
> + /* Find slashes or end-of-string. The double-negative
> + * (negative-polarity search for non-slashes) is
> + * necessary so that \0 will also be counted. */
> + endpos = _mm_cmpistri((__m128i) nonslashes, tmp,
> + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |
> + _SIDD_NEGATIVE_POLARITY);
> +
> + if (_mm_cmpestrc((__m128i) bad, BLOCK_SIZE, tmp, endpos,
> + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES))
> + return -1;
> +
> + dotdotpos = _mm_cmpestri((__m128i) dotdot, 2, tmp, endpos,
> + _SIDD_UBYTE_OPS |
> + _SIDD_CMP_EQUAL_ORDERED);
> + if (dotdotpos < 15)
> + return -1;
> +
> + atcurlypos = _mm_cmpestri((__m128i) atcurly, 2, tmp, endpos,
> + _SIDD_UBYTE_OPS |
> + _SIDD_CMP_EQUAL_ORDERED);
> + if (atcurlypos < 15)
> + return -1;
> +
> + if (endpos < BLOCK_SIZE) {
> + cp = ((const char*) vp) + endpos;
> + break;
> + }
> + cp = (const char*) vp + BLOCK_SIZE;
> + }
> +
> if (cp == refname)
> return 0; /* Component has zero length. */
> +
> if (refname[0] == '.') {
> if (!(flags & REFNAME_DOT_COMPONENT))
> return -1; /* Component starts with '.'. */
> @@ -67,6 +208,7 @@ static int check_refname_component(const char *refname, int flags)
> return -1; /* Refname ends with ".lock". */
> return cp - refname;
> }
> +#endif
>
> int check_refname_format(const char *refname, int flags)
> {
> diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh
> index c289322..0f03f9c 100755
> --- a/t/t5511-refspec.sh
> +++ b/t/t5511-refspec.sh
> @@ -84,4 +84,17 @@ test_refspec push 'refs/heads/*/*/for-linus:refs/remotes/mine/*' invalid
> test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*'
> test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*'
>
> +test_refspec fetch 'refs/heads/a-very-long-refname'
> +test_refspec fetch 'refs/heads/.a-very-long-refname' invalid
> +test_refspec fetch 'refs/heads/abcdefgh0123..' invalid
> +test_refspec fetch 'refs/heads/abcdefgh01234..' invalid
> +test_refspec fetch 'refs/heads/abcdefgh012345..' invalid
> +test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid
> +test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid
> +test_refspec fetch 'refs/heads/abcdefgh0123.a'
> +test_refspec fetch 'refs/heads/abcdefgh01234.a'
> +test_refspec fetch 'refs/heads/abcdefgh012345.a'
> +test_refspec fetch 'refs/heads/abcdefgh0123456.a'
> +test_refspec fetch 'refs/heads/abcdefgh01234567.a'
> +
> test_done
>
Please mention in your commit message why you added these tests. (Are
they to test peculiarities around 16-byte boundaries?)
Michael
--
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-28 21:44 ` Michael Haggerty
@ 2014-05-28 23:49 ` David Turner
2014-05-29 13:41 ` Duy Nguyen
0 siblings, 1 reply; 17+ messages in thread
From: David Turner @ 2014-05-28 23:49 UTC (permalink / raw)
To: Michael Haggerty; +Cc: git, gitster, David Turner
On Wed, 2014-05-28 at 23:44 +0200, Michael Haggerty wrote:
> On 05/28/2014 11:04 PM, David Turner wrote:
> > In a repository with tens of thousands of refs, the command
> > ~/git/git-diff-index --cached --quiet --ignore-submodules [revision]
> > is a bit slow. check_refname_component is a major contributor to the
> > runtime. Provide two optimized versions of this function: one for
> > machines with SSE4.2, and one for machines without. The speedup for
> > this command on one particular repo (with about 60k refs) is about 12%
> > for the SSE version and 8% for the non-SSE version.
>
> Interesting. Thanks for the patch.
Thanks for your thoughtful comments.
> Why do you use "git diff-index" to benchmark your change? Is there
> something particular about that command that leads to especially bad
> performance with lots of references?
Because a colleague specifically asked me to look at it because he uses
it as part of the zsh/bash git prompt dirty bit display. But that is not
actually a good reason to use it in the commit-message. This is also
the reason why milliseconds matter, although at present other parts of
git are slow enough that the prompt stuff is still somewhat infeasible
for large repos.
> I assume that most of the time spent in check_refname_component() is
> while reading the packed-refs file, right?
Yes.
> If so, there are probably
> other git commands that would better measure that time, with less other
> work. For example, "git rev-parse refname" will read the packed-refs
> file, too (assuming that "refname" is not a loose reference). If you
> test the speedup of a cheaper command like that, it seems to me that you
> will get a better idea of how much you are speeding up the ref-reading
> part. It would also be interesting to see the difference in
> milliseconds (rather than a percentage) for some specified number of
> references.
I'll change it to rev-parse and to show the difference in milliseconds.
The times on rev-parse are 35/29/25ms on one particular computer, for
original, LUT, SSE. So, somewhat larger speedup in percentage terms. I
should also note that this represents a real use-case, and I expect that
the number of refs will be somewhat larger in the future.
> I think it's worth using your LUT-based approach to save 8%. I'm less
> sure it's worth the complication and unreadability of using SSE to get
> the next 4% speedup. But the gains might be proven to be more
> significant if you benchmark them differently.
I was actually hoping at some point to use SSE to speed up a small few
of the other slow bits e.g. get_sha1_hex. I have not yet tested if this
will actually be faster, but I bet it will. And my watchman branch uses
it to speed up the hashmap (but it seems CityHash works about as well so
I might just port that instead).
But actually speaking of SSE: is there a minimum compiler version for
git? Because I have just discovered gcc-4.2 on the Mac has a bug which
causes this code to misbehave. Yet again, compiler intrinsics prove
less portable than straight assembly language. I would be just as happy
to write it in assembly, but I suspect that nobody would enjoy that. I
could also work around the bug with a compiler-specific ifdef, but Apple
has been shipping clang for some years now, so I think it's OK to leave
the code as-is.
> I won't critique the code in detail until we have thrashed out the
> metaissues, but I made a few comments below about nits that I noticed
> when I scanned through.
I'll go ahead and roll a new version with the nits picked.
> Please add a comment here about what the values in refname_disposition
> signify. And maybe add some line comments explaining unusual values,
> for people who haven't memorized the ASCII encoding; e.g., on the third
> line,
I think I'm going to say, "see below for the list of illegal characters,
from which this table is derived.", if that's OK with you.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-28 19:57 [PATCH] check_refname_component: Optimize David Turner
2014-05-28 21:24 ` Junio C Hamano
@ 2014-05-29 12:19 ` brian m. carlson
1 sibling, 0 replies; 17+ messages in thread
From: brian m. carlson @ 2014-05-29 12:19 UTC (permalink / raw)
To: David Turner; +Cc: git, mhagger, gitster, David Turner
[-- Attachment #1: Type: text/plain, Size: 831 bytes --]
On Wed, May 28, 2014 at 03:57:35PM -0400, David Turner wrote:
> +ifdef NO_SSE
> + BASIC_CFLAGS += -DNO_SSE
This is not a great name. This implies the system does not have SSE
(SSE 1), but in fact what you're concerned about is SSE 4. This will be
confusing for users of older x86-64 processors, where SSE and SSE 2 are
architecturally guaranteed even though SSE 4 is not present.
> +else
> + BASIC_CFLAGS += -msse4
> +endif
You probably also want to autodetect when the system is not x86(-64)?
and turn this off. I doubt PowerPC/SPARC/ARM users are going to want to
have to disable it manually.
--
brian m. carlson / brian with sandals: Houston, Texas, US
+1 832 623 2791 | http://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: RSA v4 4096b: 88AC E9B2 9196 305B A994 7552 F1BA 225C 0223 B187
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-28 23:49 ` David Turner
@ 2014-05-29 13:41 ` Duy Nguyen
2014-05-29 16:36 ` Junio C Hamano
0 siblings, 1 reply; 17+ messages in thread
From: Duy Nguyen @ 2014-05-29 13:41 UTC (permalink / raw)
To: David Turner
Cc: Michael Haggerty, Git Mailing List, Junio C Hamano, David Turner
On Thu, May 29, 2014 at 6:49 AM, David Turner <dturner@twopensource.com> wrote:
>> I assume that most of the time spent in check_refname_component() is
>> while reading the packed-refs file, right?
>
> Yes.
I wonder if we can get away without SSE code by saving stat info of
the packed-refs version that we have verified. When we read pack-refs,
if stat info matches, skip check_refname_component(). Assuming that
pack-refs does not change often, of course.
--
Duy
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-29 13:41 ` Duy Nguyen
@ 2014-05-29 16:36 ` Junio C Hamano
2014-05-29 23:24 ` Duy Nguyen
0 siblings, 1 reply; 17+ messages in thread
From: Junio C Hamano @ 2014-05-29 16:36 UTC (permalink / raw)
To: Duy Nguyen; +Cc: David Turner, Michael Haggerty, Git Mailing List, David Turner
Duy Nguyen <pclouds@gmail.com> writes:
> On Thu, May 29, 2014 at 6:49 AM, David Turner <dturner@twopensource.com> wrote:
>>> I assume that most of the time spent in check_refname_component() is
>>> while reading the packed-refs file, right?
>>
>> Yes.
>
> I wonder if we can get away without SSE code by saving stat info of
> the packed-refs version that we have verified. When we read pack-refs,
> if stat info matches, skip check_refname_component(). Assuming that
> pack-refs does not change often, of course.
Can you elaborate a bit more?
Regardless, I think I would prefer to see this patch done as at
least a two step series, one that does only the look-up table thing,
and then the other with arch-specific tweaks as a follow-up on top.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-29 16:36 ` Junio C Hamano
@ 2014-05-29 23:24 ` Duy Nguyen
2014-05-29 23:41 ` Jeff King
0 siblings, 1 reply; 17+ messages in thread
From: Duy Nguyen @ 2014-05-29 23:24 UTC (permalink / raw)
To: Junio C Hamano
Cc: David Turner, Michael Haggerty, Git Mailing List, David Turner
On Thu, May 29, 2014 at 11:36 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Duy Nguyen <pclouds@gmail.com> writes:
>
>> On Thu, May 29, 2014 at 6:49 AM, David Turner <dturner@twopensource.com> wrote:
>>>> I assume that most of the time spent in check_refname_component() is
>>>> while reading the packed-refs file, right?
>>>
>>> Yes.
>>
>> I wonder if we can get away without SSE code by saving stat info of
>> the packed-refs version that we have verified. When we read pack-refs,
>> if stat info matches, skip check_refname_component(). Assuming that
>> pack-refs does not change often, of course.
>
> Can you elaborate a bit more?
The first time we read packed_refs, check_refname_format() is called
in read_packed_refs()->create_ref_entry() as usual. If we find no
problem, we store packed_refs stat() info in maybe packed_refs.stat.
Next time we read packed_refs, if packed_refs.stat is there and
indicates that packed_refs has not changed, we can make
create_ref_entry() ignore check_refname_format() completely. That's
even cheaper than SSE-enhanced check_refname_component() and easier to
do. If packed_refs is updated, we do the whole check_refname_format
dance again.
--
Duy
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-29 23:24 ` Duy Nguyen
@ 2014-05-29 23:41 ` Jeff King
2014-05-29 23:43 ` Duy Nguyen
0 siblings, 1 reply; 17+ messages in thread
From: Jeff King @ 2014-05-29 23:41 UTC (permalink / raw)
To: Duy Nguyen
Cc: Junio C Hamano, David Turner, Michael Haggerty, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 06:24:30AM +0700, Duy Nguyen wrote:
> >> I wonder if we can get away without SSE code by saving stat info of
> >> the packed-refs version that we have verified. When we read pack-refs,
> >> if stat info matches, skip check_refname_component(). Assuming that
> >> pack-refs does not change often, of course.
> >
> > Can you elaborate a bit more?
>
> The first time we read packed_refs, check_refname_format() is called
> in read_packed_refs()->create_ref_entry() as usual. If we find no
> problem, we store packed_refs stat() info in maybe packed_refs.stat.
> Next time we read packed_refs, if packed_refs.stat is there and
> indicates that packed_refs has not changed, we can make
> create_ref_entry() ignore check_refname_format() completely.
I'm confused. Why would we re-open packed-refs at all if the stat
information hasn't changed?
read_packed_refs is only called from get_packed_ref_cache, and we only
do so if !refs->packed. And refs->packed is only NULL if we haven't read
the file yet, or it is stat-dirty.
If that is working as intended, then we should generally only open and
read packed-refs once per invocation of git.
-Peff
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-29 23:41 ` Jeff King
@ 2014-05-29 23:43 ` Duy Nguyen
2014-05-30 0:07 ` Jeff King
0 siblings, 1 reply; 17+ messages in thread
From: Duy Nguyen @ 2014-05-29 23:43 UTC (permalink / raw)
To: Jeff King
Cc: Junio C Hamano, David Turner, Michael Haggerty, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 6:41 AM, Jeff King <peff@peff.net> wrote:
> On Fri, May 30, 2014 at 06:24:30AM +0700, Duy Nguyen wrote:
>
>> >> I wonder if we can get away without SSE code by saving stat info of
>> >> the packed-refs version that we have verified. When we read pack-refs,
>> >> if stat info matches, skip check_refname_component(). Assuming that
>> >> pack-refs does not change often, of course.
>> >
>> > Can you elaborate a bit more?
>>
>> The first time we read packed_refs, check_refname_format() is called
>> in read_packed_refs()->create_ref_entry() as usual. If we find no
>> problem, we store packed_refs stat() info in maybe packed_refs.stat.
>> Next time we read packed_refs, if packed_refs.stat is there and
>> indicates that packed_refs has not changed, we can make
>> create_ref_entry() ignore check_refname_format() completely.
>
> I'm confused. Why would we re-open packed-refs at all if the stat
> information hasn't changed?
No, not in the same process. In the next run.
>
> read_packed_refs is only called from get_packed_ref_cache, and we only
> do so if !refs->packed. And refs->packed is only NULL if we haven't read
> the file yet, or it is stat-dirty.
>
> If that is working as intended, then we should generally only open and
> read packed-refs once per invocation of git.
>
> -Peff
--
Duy
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-29 23:43 ` Duy Nguyen
@ 2014-05-30 0:07 ` Jeff King
2014-05-30 2:03 ` Duy Nguyen
` (2 more replies)
0 siblings, 3 replies; 17+ messages in thread
From: Jeff King @ 2014-05-30 0:07 UTC (permalink / raw)
To: Duy Nguyen
Cc: Junio C Hamano, David Turner, Michael Haggerty, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 06:43:18AM +0700, Duy Nguyen wrote:
> >> The first time we read packed_refs, check_refname_format() is called
> >> in read_packed_refs()->create_ref_entry() as usual. If we find no
> >> problem, we store packed_refs stat() info in maybe packed_refs.stat.
> >> Next time we read packed_refs, if packed_refs.stat is there and
> >> indicates that packed_refs has not changed, we can make
> >> create_ref_entry() ignore check_refname_format() completely.
> >
> > I'm confused. Why would we re-open packed-refs at all if the stat
> > information hasn't changed?
>
> No, not in the same process. In the next run.
Ah, I thought "packed_refs.stat" was a struct member, but I guess you
mean it as a filename.
But then we're just trusting that the "trust me" flag on disk is
correct. Why not just trust that packed-refs is correct in the first
place?
IOW, consider this progression of changes:
1. Check refname format when we read packed-refs (the current
behavior).
2. Keep a separate file "packed-refs.stat" with stat information. If
the packed-refs file matches that stat information, do not bother
checking refname formats.
3. Put a flag in "packed-refs" that says "trust me, I'm valid". Check
the refnames when it is generated.
4. Realize that we already check the refnames when we write it out.
Don't bother writing "trust me, I'm valid"; readers can assume that
it is.
What is the scenario that option (2) protects against that options (3)
and (4) do not?
I could guess something like "the writer has a different idea of what a
valid refname is than we do". But that applies as well to (2), but just
as "the reader who wrote packed-refs.stat has a different idea than we
do".
As a side note, while it is nice that we might make check_refname_format
faster, I think if you _really_ want to make repos with a lot of refs
faster, it would make more sense to introduce an on-disk format that
does not need linear parsing (e.g., something we could mmap and binary
search, or even something dbm-ish that could be updated without
rewriting the whole file (deletions, for example, must rewrite the
whole file, giving quadratic performance when deleting all refs one by
one).
-Peff
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-30 0:07 ` Jeff King
@ 2014-05-30 2:03 ` Duy Nguyen
2014-05-30 9:47 ` Michael Haggerty
2014-05-31 11:21 ` Duy Nguyen
2 siblings, 0 replies; 17+ messages in thread
From: Duy Nguyen @ 2014-05-30 2:03 UTC (permalink / raw)
To: Jeff King
Cc: Junio C Hamano, David Turner, Michael Haggerty, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 7:07 AM, Jeff King <peff@peff.net> wrote:
> But then we're just trusting that the "trust me" flag on disk is
> correct. Why not just trust that packed-refs is correct in the first
> place?
>
> IOW, consider this progression of changes:
>
> 1. Check refname format when we read packed-refs (the current
> behavior).
>
> 2. Keep a separate file "packed-refs.stat" with stat information. If
> the packed-refs file matches that stat information, do not bother
> checking refname formats.
>
> 3. Put a flag in "packed-refs" that says "trust me, I'm valid". Check
> the refnames when it is generated.
>
> 4. Realize that we already check the refnames when we write it out.
> Don't bother writing "trust me, I'm valid"; readers can assume that
> it is.
>
> What is the scenario that option (2) protects against that options (3)
> and (4) do not?
>
> I could guess something like "the writer has a different idea of what a
> valid refname is than we do". But that applies as well to (2), but just
> as "the reader who wrote packed-refs.stat has a different idea than we
> do".
The reader and the writer have to agree on the same "valid" definition
or it wouldn't work. I don't suppose this packed-refs.stat idea would
spread out to other implementations than C git, so we're still good.
If we could write a flag in packed-refs saying "trust me" and other
implementations will strip it when they update packed-refs, then we're
good too.
>
> As a side note, while it is nice that we might make check_refname_format
> faster, I think if you _really_ want to make repos with a lot of refs
> faster, it would make more sense to introduce an on-disk format that
> does not need linear parsing (e.g., something we could mmap and binary
> search, or even something dbm-ish that could be updated without
> rewriting the whole file (deletions, for example, must rewrite the
> whole file, giving quadratic performance when deleting all refs one by
> one).
Yeah, I bring up the idea because I think Mike's multiple ref backends
is the way to go (assuming that it won't take as long as pack v4
development). If we assume we'll go with that, then we can keep the
workaround to minimum.
--
Duy
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-30 0:07 ` Jeff King
2014-05-30 2:03 ` Duy Nguyen
@ 2014-05-30 9:47 ` Michael Haggerty
2014-05-30 17:29 ` Jeff King
2014-05-31 11:21 ` Duy Nguyen
2 siblings, 1 reply; 17+ messages in thread
From: Michael Haggerty @ 2014-05-30 9:47 UTC (permalink / raw)
To: Jeff King, Duy Nguyen
Cc: Junio C Hamano, David Turner, Git Mailing List, David Turner
On 05/30/2014 02:07 AM, Jeff King wrote:
> On Fri, May 30, 2014 at 06:43:18AM +0700, Duy Nguyen wrote:
>
>>>> The first time we read packed_refs, check_refname_format() is called
>>>> in read_packed_refs()->create_ref_entry() as usual. If we find no
>>>> problem, we store packed_refs stat() info in maybe packed_refs.stat.
>>>> Next time we read packed_refs, if packed_refs.stat is there and
>>>> indicates that packed_refs has not changed, we can make
>>>> create_ref_entry() ignore check_refname_format() completely.
>>>
>>> I'm confused. Why would we re-open packed-refs at all if the stat
>>> information hasn't changed?
>>
>> No, not in the same process. In the next run.
>
> Ah, I thought "packed_refs.stat" was a struct member, but I guess you
> mean it as a filename.
>
> But then we're just trusting that the "trust me" flag on disk is
> correct. Why not just trust that packed-refs is correct in the first
> place?
>
> IOW, consider this progression of changes:
>
> 1. Check refname format when we read packed-refs (the current
> behavior).
>
> 2. Keep a separate file "packed-refs.stat" with stat information. If
> the packed-refs file matches that stat information, do not bother
> checking refname formats.
>
> 3. Put a flag in "packed-refs" that says "trust me, I'm valid". Check
> the refnames when it is generated.
>
> 4. Realize that we already check the refnames when we write it out.
> Don't bother writing "trust me, I'm valid"; readers can assume that
> it is.
>
> What is the scenario that option (2) protects against that options (3)
> and (4) do not?
>
> I could guess something like "the writer has a different idea of what a
> valid refname is than we do". But that applies as well to (2), but just
> as "the reader who wrote packed-refs.stat has a different idea than we
> do".
If we want to be robust to future changes to refname rules, we could add
a header flag like
# pack-refs with: peeled fully-peeled check-level=1.0
which promises that the reference names in the file conform to the
current ("version 1.0") check_refname_format() rules.
If we ever make the rules stricter (a "backwards-compatible" change), we
would increment the check-level to 1.1. That way, an old reader, who
knows about check-level=1.0 but not check-level=1.1, can still trust
that the refnames in the file conform to its check_refname_format()
rules and avoid the verification step. (Of course if that version
writes the file again, it would have to set the check-level=1.0 tag, and
newer software would be forced to validate on reading to be sure that
the refnames still conform to check-level=1.1.)
If we make the rules looser (a "backwards-incompatible" change), we
would increment the check-level to 2.0. In that case readers who only
know about check-level 1.x would have to turn their verification code
back on when reading the file to ensure that they can work with the
refnames that it contains.
Format changes should be infrequent enough, and the cost of verification
is low enough, that sometimes ping-ponging back and forth between
software versions shouldn't be a problem in practice.
Michael
--
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-30 9:47 ` Michael Haggerty
@ 2014-05-30 17:29 ` Jeff King
2014-05-31 10:47 ` Michael Haggerty
0 siblings, 1 reply; 17+ messages in thread
From: Jeff King @ 2014-05-30 17:29 UTC (permalink / raw)
To: Michael Haggerty
Cc: Duy Nguyen, Junio C Hamano, David Turner, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 11:47:33AM +0200, Michael Haggerty wrote:
> > I could guess something like "the writer has a different idea of what a
> > valid refname is than we do". But that applies as well to (2), but just
> > as "the reader who wrote packed-refs.stat has a different idea than we
> > do".
>
> If we want to be robust to future changes to refname rules, we could add
> a header flag like
>
> # pack-refs with: peeled fully-peeled check-level=1.0
>
> which promises that the reference names in the file conform to the
> current ("version 1.0") check_refname_format() rules.
Yeah, I thought about mentioning something like that. But really, this
just seems like a lot of complexity to solve the problem in a wrong way.
It's not running check_refname_format that is the real problem. It's the
fact that we do O(# of refs) work whenever we have to access the
packed-refs file. check_refname_format is part of that, surely, but so
is reading the file, creating all of the refname structs in memory, etc.
I'd much rather see a solution that lets us do O(log N) or O(1) work to
access a ref, and then we don't have to care about optimizing
check_refname_format specifically.
I don't mind internal code speedups to micro-optimize check_refname_format.
They may make the code uglier, but they're fairly contained. But things
like check-level are much more invasive, and we'll need to keep
compatibility with them in future versions.
-Peff
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-30 17:29 ` Jeff King
@ 2014-05-31 10:47 ` Michael Haggerty
0 siblings, 0 replies; 17+ messages in thread
From: Michael Haggerty @ 2014-05-31 10:47 UTC (permalink / raw)
To: Jeff King
Cc: Duy Nguyen, Junio C Hamano, David Turner, Git Mailing List,
David Turner
On 05/30/2014 07:29 PM, Jeff King wrote:
> On Fri, May 30, 2014 at 11:47:33AM +0200, Michael Haggerty wrote:
>> [...]
>> If we want to be robust to future changes to refname rules, we could add
>> a header flag like
>>
>> # pack-refs with: peeled fully-peeled check-level=1.0
> [...]
> Yeah, I thought about mentioning something like that. But really, this
> just seems like a lot of complexity to solve the problem in a wrong way.
Yes, you are right.
Michael
--
Michael Haggerty
mhagger@alum.mit.edu
http://softwareswirl.blogspot.com/
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] check_refname_component: Optimize
2014-05-30 0:07 ` Jeff King
2014-05-30 2:03 ` Duy Nguyen
2014-05-30 9:47 ` Michael Haggerty
@ 2014-05-31 11:21 ` Duy Nguyen
2 siblings, 0 replies; 17+ messages in thread
From: Duy Nguyen @ 2014-05-31 11:21 UTC (permalink / raw)
To: Jeff King
Cc: Junio C Hamano, David Turner, Michael Haggerty, Git Mailing List,
David Turner
On Fri, May 30, 2014 at 7:07 AM, Jeff King <peff@peff.net> wrote:
> But then we're just trusting that the "trust me" flag on disk is
> correct. Why not just trust that packed-refs is correct in the first
> place?
I missed one thing in the first reply: because packed-refs is a plain
text file, the user could be tempted to edit it manually (I know I did
a few times for fast rename) and so it could not be trusted. If we
ignore this (and I think we can, it's not like we encourage people to
edit files in $GIT_DIR), then #3 and #4 are as good as #2.
>
> IOW, consider this progression of changes:
>
> 1. Check refname format when we read packed-refs (the current
> behavior).
>
> 2. Keep a separate file "packed-refs.stat" with stat information. If
> the packed-refs file matches that stat information, do not bother
> checking refname formats.
>
> 3. Put a flag in "packed-refs" that says "trust me, I'm valid". Check
> the refnames when it is generated.
>
> 4. Realize that we already check the refnames when we write it out.
> Don't bother writing "trust me, I'm valid"; readers can assume that
> it is.
>
> What is the scenario that option (2) protects against that options (3)
> and (4) do not?
--
Duy
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2014-05-31 11:21 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-28 19:57 [PATCH] check_refname_component: Optimize David Turner
2014-05-28 21:24 ` Junio C Hamano
2014-05-29 12:19 ` brian m. carlson
-- strict thread matches above, loose matches on Subject: below --
2014-05-28 21:04 [PATCH v2 0/1] " David Turner
2014-05-28 21:04 ` [PATCH] " David Turner
2014-05-28 21:44 ` Michael Haggerty
2014-05-28 23:49 ` David Turner
2014-05-29 13:41 ` Duy Nguyen
2014-05-29 16:36 ` Junio C Hamano
2014-05-29 23:24 ` Duy Nguyen
2014-05-29 23:41 ` Jeff King
2014-05-29 23:43 ` Duy Nguyen
2014-05-30 0:07 ` Jeff King
2014-05-30 2:03 ` Duy Nguyen
2014-05-30 9:47 ` Michael Haggerty
2014-05-30 17:29 ` Jeff King
2014-05-31 10:47 ` Michael Haggerty
2014-05-31 11:21 ` Duy Nguyen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).