- * [PATCH v2 1/5] compat: add qsort_s()
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
@ 2017-01-22 17:51 ` René Scharfe
  2017-01-22 17:52 ` [PATCH v2 2/5] add QSORT_S René Scharfe
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: René Scharfe @ 2017-01-22 17:51 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
The function qsort_s() was introduced with C11 Annex K; it provides the
ability to pass a context pointer to the comparison function, supports
the convention of using a NULL pointer for an empty array and performs a
few safety checks.
Add an implementation based on compat/qsort.c for platforms that lack a
native standards-compliant qsort_s() (i.e. basically everyone).  It
doesn't perform the full range of possible checks: It uses size_t
instead of rsize_t and doesn't check nmemb and size against RSIZE_MAX
because we probably don't have the restricted size type defined.  For
the same reason it returns int instead of errno_t.
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 Makefile          |  8 +++++++
 compat/qsort_s.c  | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 git-compat-util.h |  6 +++++
 3 files changed, 83 insertions(+)
 create mode 100644 compat/qsort_s.c
diff --git a/Makefile b/Makefile
index 27afd0f378..53ecc84e28 100644
--- a/Makefile
+++ b/Makefile
@@ -279,6 +279,9 @@ all::
 # is a simplified version of the merge sort used in glibc. This is
 # recommended if Git triggers O(n^2) behavior in your platform's qsort().
 #
+# Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's
+# compatible with the one described in C11 Annex K.
+#
 # Define UNRELIABLE_FSTAT if your system's fstat does not return the same
 # information on a not yet closed file that lstat would return for the same
 # file after it was closed.
@@ -1418,6 +1421,11 @@ ifdef INTERNAL_QSORT
 	COMPAT_CFLAGS += -DINTERNAL_QSORT
 	COMPAT_OBJS += compat/qsort.o
 endif
+ifdef HAVE_ISO_QSORT_S
+	COMPAT_CFLAGS += -DHAVE_ISO_QSORT_S
+else
+	COMPAT_OBJS += compat/qsort_s.o
+endif
 ifdef RUNTIME_PREFIX
 	COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
diff --git a/compat/qsort_s.c b/compat/qsort_s.c
new file mode 100644
index 0000000000..52d1f0a73d
--- /dev/null
+++ b/compat/qsort_s.c
@@ -0,0 +1,69 @@
+#include "../git-compat-util.h"
+
+/*
+ * A merge sort implementation, simplified from the qsort implementation
+ * by Mike Haertel, which is a part of the GNU C Library.
+ * Added context pointer, safety checks and return value.
+ */
+
+static void msort_with_tmp(void *b, size_t n, size_t s,
+			   int (*cmp)(const void *, const void *, void *),
+			   char *t, void *ctx)
+{
+	char *tmp;
+	char *b1, *b2;
+	size_t n1, n2;
+
+	if (n <= 1)
+		return;
+
+	n1 = n / 2;
+	n2 = n - n1;
+	b1 = b;
+	b2 = (char *)b + (n1 * s);
+
+	msort_with_tmp(b1, n1, s, cmp, t, ctx);
+	msort_with_tmp(b2, n2, s, cmp, t, ctx);
+
+	tmp = t;
+
+	while (n1 > 0 && n2 > 0) {
+		if (cmp(b1, b2, ctx) <= 0) {
+			memcpy(tmp, b1, s);
+			tmp += s;
+			b1 += s;
+			--n1;
+		} else {
+			memcpy(tmp, b2, s);
+			tmp += s;
+			b2 += s;
+			--n2;
+		}
+	}
+	if (n1 > 0)
+		memcpy(tmp, b1, n1 * s);
+	memcpy(b, t, (n - n2) * s);
+}
+
+int git_qsort_s(void *b, size_t n, size_t s,
+		int (*cmp)(const void *, const void *, void *), void *ctx)
+{
+	const size_t size = st_mult(n, s);
+	char buf[1024];
+
+	if (!n)
+		return 0;
+	if (!b || !cmp)
+		return -1;
+
+	if (size < sizeof(buf)) {
+		/* The temporary array fits on the small on-stack buffer. */
+		msort_with_tmp(b, n, s, cmp, buf, ctx);
+	} else {
+		/* It's somewhat large, so malloc it.  */
+		char *tmp = xmalloc(size);
+		msort_with_tmp(b, n, s, cmp, tmp, ctx);
+		free(tmp);
+	}
+	return 0;
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 87237b092b..f706744e6a 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -988,6 +988,12 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size,
 		qsort(base, nmemb, size, compar);
 }
 
+#ifndef HAVE_ISO_QSORT_S
+int git_qsort_s(void *base, size_t nmemb, size_t size,
+		int (*compar)(const void *, const void *, void *), void *ctx);
+#define qsort_s git_qsort_s
+#endif
+
 #ifndef REG_STARTEND
 #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
 #endif
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH v2 2/5] add QSORT_S
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
  2017-01-22 17:51 ` [PATCH v2 1/5] compat: add qsort_s() René Scharfe
@ 2017-01-22 17:52 ` René Scharfe
  2017-01-22 17:53 ` [PATCH v2 3/5] perf: add basic sort performance test René Scharfe
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: René Scharfe @ 2017-01-22 17:52 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
Add the macro QSORT_S, a convenient wrapper for qsort_s() that infers
the size of the array elements and dies on error.
Basically all possible errors are programming mistakes (passing NULL as
base of a non-empty array, passing NULL as comparison function,
out-of-bounds accesses), so terminating the program should be acceptable
for most callers.
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 git-compat-util.h | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/git-compat-util.h b/git-compat-util.h
index f706744e6a..f46d40e4a4 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -994,6 +994,11 @@ int git_qsort_s(void *base, size_t nmemb, size_t size,
 #define qsort_s git_qsort_s
 #endif
 
+#define QSORT_S(base, n, compar, ctx) do {			\
+	if (qsort_s((base), (n), sizeof(*(base)), compar, ctx))	\
+		die("BUG: qsort_s() failed");			\
+} while (0)
+
 #ifndef REG_STARTEND
 #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
 #endif
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH v2 3/5] perf: add basic sort performance test
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
  2017-01-22 17:51 ` [PATCH v2 1/5] compat: add qsort_s() René Scharfe
  2017-01-22 17:52 ` [PATCH v2 2/5] add QSORT_S René Scharfe
@ 2017-01-22 17:53 ` René Scharfe
  2017-01-22 17:57 ` [PATCH v2 4/5] string-list: use QSORT_S in string_list_sort() René Scharfe
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: René Scharfe @ 2017-01-22 17:53 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
Add a sort command to test-string-list that reads lines from stdin,
stores them in a string_list and then sorts it.  Use it in a simple
perf test script to measure the performance of string_list_sort().
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 t/helper/test-string-list.c | 25 +++++++++++++++++++++++++
 t/perf/p0071-sort.sh        | 26 ++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100755 t/perf/p0071-sort.sh
diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c
index 4a68967bd1..c502fa16d3 100644
--- a/t/helper/test-string-list.c
+++ b/t/helper/test-string-list.c
@@ -97,6 +97,31 @@ int cmd_main(int argc, const char **argv)
 		return 0;
 	}
 
+	if (argc == 2 && !strcmp(argv[1], "sort")) {
+		struct string_list list = STRING_LIST_INIT_NODUP;
+		struct strbuf sb = STRBUF_INIT;
+		struct string_list_item *item;
+
+		strbuf_read(&sb, 0, 0);
+
+		/*
+		 * Split by newline, but don't create a string_list item
+		 * for the empty string after the last separator.
+		 */
+		if (sb.buf[sb.len - 1] == '\n')
+			strbuf_setlen(&sb, sb.len - 1);
+		string_list_split_in_place(&list, sb.buf, '\n', -1);
+
+		string_list_sort(&list);
+
+		for_each_string_list_item(item, &list)
+			puts(item->string);
+
+		string_list_clear(&list, 0);
+		strbuf_release(&sb);
+		return 0;
+	}
+
 	fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
 		argv[1] ? argv[1] : "(there was none)");
 	return 1;
diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh
new file mode 100755
index 0000000000..7c9a35a646
--- /dev/null
+++ b/t/perf/p0071-sort.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+test_description='Basic sort performance tests'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'setup' '
+	git ls-files --stage "*.[ch]" "*.sh" |
+	cut -f2 -d" " |
+	git cat-file --batch >unsorted
+'
+
+test_perf 'sort(1)' '
+	sort <unsorted >expect
+'
+
+test_perf 'string_list_sort()' '
+	test-string-list sort <unsorted >actual
+'
+
+test_expect_success 'string_list_sort() sorts like sort(1)' '
+	test_cmp_bin expect actual
+'
+
+test_done
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH v2 4/5] string-list: use QSORT_S in string_list_sort()
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
                   ` (2 preceding siblings ...)
  2017-01-22 17:53 ` [PATCH v2 3/5] perf: add basic sort performance test René Scharfe
@ 2017-01-22 17:57 ` René Scharfe
  2017-01-22 17:58 ` [PATCH v2 5/5] ref-filter: use QSORT_S in ref_array_sort() René Scharfe
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: René Scharfe @ 2017-01-22 17:57 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
Pass the comparison function to cmp_items() via the context parameter of
qsort_s() instead of using a global variable.  That allows calling
string_list_sort() from multiple parallel threads.
Our qsort_s() in compat/ is slightly slower than qsort(1) from glibc
2.24 for sorting lots of lines:
Test                         HEAD^             HEAD
---------------------------------------------------------------------
0071.2: sort(1)              0.10(0.22+0.01)   0.09(0.21+0.00) -10.0%
0071.3: string_list_sort()   0.16(0.15+0.01)   0.17(0.15+0.00) +6.3%
GNU sort(1) version 8.26 is significantly faster because it uses
multiple parallel threads; with the unportable option --parallel=1 it
becomes slower:
Test                         HEAD^             HEAD
--------------------------------------------------------------------
0071.2: sort(1)              0.21(0.18+0.01)   0.20(0.18+0.01) -4.8%
0071.3: string_list_sort()   0.16(0.13+0.02)   0.17(0.15+0.01) +6.3%
There is some instability -- the numbers for the sort(1) check shouldn't
be affected by this patch.  Anyway, the performance of our qsort_s()
implementation is apparently good enough, at least for this test.
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 string-list.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/string-list.c b/string-list.c
index 8c83cac189..45016ad86d 100644
--- a/string-list.c
+++ b/string-list.c
@@ -211,21 +211,18 @@ struct string_list_item *string_list_append(struct string_list *list,
 			list->strdup_strings ? xstrdup(string) : (char *)string);
 }
 
-/* Yuck */
-static compare_strings_fn compare_for_qsort;
-
-/* Only call this from inside string_list_sort! */
-static int cmp_items(const void *a, const void *b)
+static int cmp_items(const void *a, const void *b, void *ctx)
 {
+	compare_strings_fn cmp = ctx;
 	const struct string_list_item *one = a;
 	const struct string_list_item *two = b;
-	return compare_for_qsort(one->string, two->string);
+	return cmp(one->string, two->string);
 }
 
 void string_list_sort(struct string_list *list)
 {
-	compare_for_qsort = list->cmp ? list->cmp : strcmp;
-	QSORT(list->items, list->nr, cmp_items);
+	QSORT_S(list->items, list->nr, cmp_items,
+		list->cmp ? list->cmp : strcmp);
 }
 
 struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [PATCH v2 5/5] ref-filter: use QSORT_S in ref_array_sort()
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
                   ` (3 preceding siblings ...)
  2017-01-22 17:57 ` [PATCH v2 4/5] string-list: use QSORT_S in string_list_sort() René Scharfe
@ 2017-01-22 17:58 ` René Scharfe
  2017-01-22 18:02 ` [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1) René Scharfe
  2017-01-23 23:54 ` [PATCH v2 0/5] string-list: make string_list_sort() reentrant Jeff King
  6 siblings, 0 replies; 18+ messages in thread
From: René Scharfe @ 2017-01-22 17:58 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
Pass the array of sort keys to compare_refs() via the context parameter
of qsort_s() instead of using a global variable; that's cleaner and
simpler.  If ref_array_sort() is to be called from multiple parallel
threads then care still needs to be taken that the global variable
used_atom is not modified concurrently.
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
 ref-filter.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/ref-filter.c b/ref-filter.c
index 1a978405e6..3975022c88 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1589,8 +1589,7 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 	return (s->reverse) ? -cmp : cmp;
 }
 
-static struct ref_sorting *ref_sorting;
-static int compare_refs(const void *a_, const void *b_)
+static int compare_refs(const void *a_, const void *b_, void *ref_sorting)
 {
 	struct ref_array_item *a = *((struct ref_array_item **)a_);
 	struct ref_array_item *b = *((struct ref_array_item **)b_);
@@ -1606,8 +1605,7 @@ static int compare_refs(const void *a_, const void *b_)
 
 void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array)
 {
-	ref_sorting = sorting;
-	QSORT(array->items, array->nr, compare_refs);
+	QSORT_S(array->items, array->nr, compare_refs, sorting);
 }
 
 static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state)
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
                   ` (4 preceding siblings ...)
  2017-01-22 17:58 ` [PATCH v2 5/5] ref-filter: use QSORT_S in ref_array_sort() René Scharfe
@ 2017-01-22 18:02 ` René Scharfe
  2017-01-23 19:07   ` Junio C Hamano
  2017-01-23 23:54 ` [PATCH v2 0/5] string-list: make string_list_sort() reentrant Jeff King
  6 siblings, 1 reply; 18+ messages in thread
From: René Scharfe @ 2017-01-22 18:02 UTC (permalink / raw)
  To: Git List; +Cc: Jeff King, Junio C Hamano, Johannes Schindelin
Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and
use it on Linux.  Performance increases slightly:
Test                         HEAD^             HEAD
--------------------------------------------------------------------
0071.2: sort(1)              0.10(0.20+0.02)   0.10(0.21+0.01) +0.0%
0071.3: string_list_sort()   0.17(0.15+0.01)   0.16(0.15+0.01) -5.9%
Additionally the unstripped size of compat/qsort_s.o falls from 24576
to 16544 bytes in my build.
IMHO these savings aren't worth the increased complexity of having to
support two implementations.
---
 Makefile         |  6 ++++++
 compat/qsort_s.c | 18 ++++++++++++++++++
 config.mak.uname |  1 +
 3 files changed, 25 insertions(+)
diff --git a/Makefile b/Makefile
index 53ecc84e28..46db1c773f 100644
--- a/Makefile
+++ b/Makefile
@@ -282,6 +282,9 @@ all::
 # Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's
 # compatible with the one described in C11 Annex K.
 #
+# Define HAVE_GNU_QSORT_R if your platform provides a qsort_r() that's
+# compatible with the one introduced with glibc 2.8.
+#
 # Define UNRELIABLE_FSTAT if your system's fstat does not return the same
 # information on a not yet closed file that lstat would return for the same
 # file after it was closed.
@@ -1426,6 +1429,9 @@ ifdef HAVE_ISO_QSORT_S
 else
 	COMPAT_OBJS += compat/qsort_s.o
 endif
+ifdef HAVE_GNU_QSORT_R
+	COMPAT_CFLAGS += -DHAVE_GNU_QSORT_R
+endif
 ifdef RUNTIME_PREFIX
 	COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
diff --git a/compat/qsort_s.c b/compat/qsort_s.c
index 52d1f0a73d..763ee1faae 100644
--- a/compat/qsort_s.c
+++ b/compat/qsort_s.c
@@ -1,5 +1,21 @@
 #include "../git-compat-util.h"
 
+#if defined HAVE_GNU_QSORT_R
+
+int git_qsort_s(void *b, size_t n, size_t s,
+		int (*cmp)(const void *, const void *, void *), void *ctx)
+{
+	if (!n)
+		return 0;
+	if (!b || !cmp)
+		return -1;
+
+	qsort_r(b, n, s, cmp, ctx);
+	return 0;
+}
+
+#else
+
 /*
  * A merge sort implementation, simplified from the qsort implementation
  * by Mike Haertel, which is a part of the GNU C Library.
@@ -67,3 +83,5 @@ int git_qsort_s(void *b, size_t n, size_t s,
 	}
 	return 0;
 }
+
+#endif
diff --git a/config.mak.uname b/config.mak.uname
index 447f36ac2e..a1858f54ff 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -37,6 +37,7 @@ ifeq ($(uname_S),Linux)
 	NEEDS_LIBRT = YesPlease
 	HAVE_GETDELIM = YesPlease
 	SANE_TEXT_GREP=-a
+	HAVE_GNU_QSORT_R = YesPlease
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
 	HAVE_ALLOCA_H = YesPlease
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-22 18:02 ` [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1) René Scharfe
@ 2017-01-23 19:07   ` Junio C Hamano
  2017-01-24 18:00     ` René Scharfe
  0 siblings, 1 reply; 18+ messages in thread
From: Junio C Hamano @ 2017-01-23 19:07 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Jeff King, Johannes Schindelin
René Scharfe <l.s.r@web.de> writes:
> Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and
> use it on Linux.  Performance increases slightly:
>
> Test                         HEAD^             HEAD
> --------------------------------------------------------------------
> 0071.2: sort(1)              0.10(0.20+0.02)   0.10(0.21+0.01) +0.0%
> 0071.3: string_list_sort()   0.17(0.15+0.01)   0.16(0.15+0.01) -5.9%
>
> Additionally the unstripped size of compat/qsort_s.o falls from 24576
> to 16544 bytes in my build.
>
> IMHO these savings aren't worth the increased complexity of having to
> support two implementations.
I do worry about having to support more implementations in the
future that have different function signature for the comparison
callbacks, which will make things ugly, but this addition alone
doesn't look too bad to me.
Thanks.
> diff --git a/compat/qsort_s.c b/compat/qsort_s.c
> index 52d1f0a73d..763ee1faae 100644
> --- a/compat/qsort_s.c
> +++ b/compat/qsort_s.c
> @@ -1,5 +1,21 @@
>  #include "../git-compat-util.h"
>  
> +#if defined HAVE_GNU_QSORT_R
> +
> +int git_qsort_s(void *b, size_t n, size_t s,
> +		int (*cmp)(const void *, const void *, void *), void *ctx)
> +{
> +	if (!n)
> +		return 0;
> +	if (!b || !cmp)
> +		return -1;
> +
> +	qsort_r(b, n, s, cmp, ctx);
> +	return 0;
> +}
> +
> +#else
> +
>  /*
>   * A merge sort implementation, simplified from the qsort implementation
>   * by Mike Haertel, which is a part of the GNU C Library.
> @@ -67,3 +83,5 @@ int git_qsort_s(void *b, size_t n, size_t s,
>  	}
>  	return 0;
>  }
> +
> +#endif
> diff --git a/config.mak.uname b/config.mak.uname
> index 447f36ac2e..a1858f54ff 100644
> --- a/config.mak.uname
> +++ b/config.mak.uname
> @@ -37,6 +37,7 @@ ifeq ($(uname_S),Linux)
>  	NEEDS_LIBRT = YesPlease
>  	HAVE_GETDELIM = YesPlease
>  	SANE_TEXT_GREP=-a
> +	HAVE_GNU_QSORT_R = YesPlease
>  endif
>  ifeq ($(uname_S),GNU/kFreeBSD)
>  	HAVE_ALLOCA_H = YesPlease
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-23 19:07   ` Junio C Hamano
@ 2017-01-24 18:00     ` René Scharfe
  2017-01-24 20:39       ` Jeff King
  0 siblings, 1 reply; 18+ messages in thread
From: René Scharfe @ 2017-01-24 18:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git List, Jeff King, Johannes Schindelin
Am 23.01.2017 um 20:07 schrieb Junio C Hamano:
> René Scharfe <l.s.r@web.de> writes:
> 
>> Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and
>> use it on Linux.  Performance increases slightly:
>>
>> Test                         HEAD^             HEAD
>> --------------------------------------------------------------------
>> 0071.2: sort(1)              0.10(0.20+0.02)   0.10(0.21+0.01) +0.0%
>> 0071.3: string_list_sort()   0.17(0.15+0.01)   0.16(0.15+0.01) -5.9%
>>
>> Additionally the unstripped size of compat/qsort_s.o falls from 24576
>> to 16544 bytes in my build.
>>
>> IMHO these savings aren't worth the increased complexity of having to
>> support two implementations.
> 
> I do worry about having to support more implementations in the
> future that have different function signature for the comparison
> callbacks, which will make things ugly, but this addition alone
> doesn't look too bad to me.
It is unfair of me to show a 5% speedup and then recommend to not
include it. ;-)  That difference won't be measurable in real use cases
and the patch is not necessary.  This patch is simple, but the overall
complexity (incl. #ifdefs etc.) will be lower without it.
But here's another one, with even higher performance and with an even
bigger recommendation to not include it! :)  It veers off into another
direction: Parallel execution.  It requires thread-safe comparison
functions, which might surprise callers.  The value 1000 for the minimum
number of items before threading kicks in is just a guess, not based on
measurements.  So it's quite raw -- and I'm not sure why it's still a
bit slower than sort(1).
Test                         HEAD^             HEAD
---------------------------------------------------------------------
0071.2: sort(1)              0.10(0.18+0.03)   0.10(0.20+0.02) +0.0%
0071.3: string_list_sort()   0.17(0.14+0.02)   0.11(0.18+0.02) -35.3%
---
 compat/qsort_s.c | 76 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 18 deletions(-)
diff --git a/compat/qsort_s.c b/compat/qsort_s.c
index 52d1f0a73d..1304a089af 100644
--- a/compat/qsort_s.c
+++ b/compat/qsort_s.c
@@ -1,4 +1,5 @@
 #include "../git-compat-util.h"
+#include "../thread-utils.h"
 
 /*
  * A merge sort implementation, simplified from the qsort implementation
@@ -6,29 +7,58 @@
  * Added context pointer, safety checks and return value.
  */
 
-static void msort_with_tmp(void *b, size_t n, size_t s,
-			   int (*cmp)(const void *, const void *, void *),
-			   char *t, void *ctx)
+#define MIN_ITEMS_FOR_THREAD 1000
+
+struct work {
+	int nr_threads;
+	void *base;
+	size_t nmemb;
+	size_t size;
+	char *tmp;
+	int (*cmp)(const void *, const void *, void *);
+	void *ctx;
+};
+
+static void *msort_with_tmp(void *work)
 {
+	struct work one, two, *w = work;
 	char *tmp;
 	char *b1, *b2;
 	size_t n1, n2;
+	size_t s, n;
 
-	if (n <= 1)
-		return;
+	if (w->nmemb <= 1)
+		return NULL;
 
-	n1 = n / 2;
-	n2 = n - n1;
-	b1 = b;
-	b2 = (char *)b + (n1 * s);
+	one = two = *w;
+	one.nr_threads /= 2;
+	two.nr_threads -= one.nr_threads;
+	n = one.nmemb;
+	s = one.size;
+	n1 = one.nmemb = n / 2;
+	n2 = two.nmemb = n - n1;
+	b1 = one.base;
+	b2 = two.base = b1 + n1 * s;
+	two.tmp += n1 * s;
 
-	msort_with_tmp(b1, n1, s, cmp, t, ctx);
-	msort_with_tmp(b2, n2, s, cmp, t, ctx);
+#ifndef NO_PTHREADS
+	if (one.nr_threads && n > MIN_ITEMS_FOR_THREAD) {
+		pthread_t thread;
+		int err = pthread_create(&thread, NULL, msort_with_tmp, &one);
+		msort_with_tmp(&two);
+		if (err || pthread_join(thread, NULL))
+			msort_with_tmp(&one);
+	} else
+#endif
+	{
+		msort_with_tmp(&one);
+		msort_with_tmp(&two);
+	}
 
-	tmp = t;
+	tmp = one.tmp;
 
 	while (n1 > 0 && n2 > 0) {
-		if (cmp(b1, b2, ctx) <= 0) {
+		if (one.cmp(b1, b2, one.ctx) <= 0) {
 			memcpy(tmp, b1, s);
 			tmp += s;
 			b1 += s;
@@ -42,7 +72,8 @@ static void msort_with_tmp(void *b, size_t n, size_t s,
 	}
 	if (n1 > 0)
 		memcpy(tmp, b1, n1 * s);
-	memcpy(b, t, (n - n2) * s);
+	memcpy(one.base, one.tmp, (n - n2) * s);
+	return NULL;
 }
 
 int git_qsort_s(void *b, size_t n, size_t s,
@@ -50,20 +81,29 @@ int git_qsort_s(void *b, size_t n, size_t s,
 {
 	const size_t size = st_mult(n, s);
 	char buf[1024];
+	struct work w;
 
 	if (!n)
 		return 0;
 	if (!b || !cmp)
 		return -1;
 
+	w.nr_threads = online_cpus();
+	w.base = b;
+	w.nmemb = n;
+	w.size = s;
+	w.cmp = cmp;
+	w.ctx = ctx;
+
 	if (size < sizeof(buf)) {
 		/* The temporary array fits on the small on-stack buffer. */
-		msort_with_tmp(b, n, s, cmp, buf, ctx);
+		w.tmp = buf;
 	} else {
 		/* It's somewhat large, so malloc it.  */
-		char *tmp = xmalloc(size);
-		msort_with_tmp(b, n, s, cmp, tmp, ctx);
-		free(tmp);
+		w.tmp = xmalloc(size);
 	}
+	msort_with_tmp(&w);
+	if (w.tmp != buf)
+		free(w.tmp);
 	return 0;
 }
-- 
2.11.0
^ permalink raw reply related	[flat|nested] 18+ messages in thread
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-24 18:00     ` René Scharfe
@ 2017-01-24 20:39       ` Jeff King
  2017-01-25 18:43         ` René Scharfe
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff King @ 2017-01-24 20:39 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Git List, Johannes Schindelin
On Tue, Jan 24, 2017 at 07:00:03PM +0100, René Scharfe wrote:
> > I do worry about having to support more implementations in the
> > future that have different function signature for the comparison
> > callbacks, which will make things ugly, but this addition alone
> > doesn't look too bad to me.
> 
> It is unfair of me to show a 5% speedup and then recommend to not
> include it. ;-)  That difference won't be measurable in real use cases
> and the patch is not necessary.  This patch is simple, but the overall
> complexity (incl. #ifdefs etc.) will be lower without it.
I care less about squeezing out the last few percent performance and
more that somebody libc qsort_r() might offer some other improvement.
For instance, it could sort in-place to lower memory use for some cases,
or do some clever thing that gives more than a few percent in the real
world (something like TimSort).
I don't know to what degree we should care about that.
> But here's another one, with even higher performance and with an even
> bigger recommendation to not include it! :)  It veers off into another
> direction: Parallel execution.  It requires thread-safe comparison
> functions, which might surprise callers.  The value 1000 for the minimum
> number of items before threading kicks in is just a guess, not based on
> measurements.  So it's quite raw -- and I'm not sure why it's still a
> bit slower than sort(1).
Fun, but probably insane for our not-very-threadsafe code base. :)
-Peff
^ permalink raw reply	[flat|nested] 18+ messages in thread 
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-24 20:39       ` Jeff King
@ 2017-01-25 18:43         ` René Scharfe
  2017-01-25 18:51           ` Jeff King
  0 siblings, 1 reply; 18+ messages in thread
From: René Scharfe @ 2017-01-25 18:43 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Git List, Johannes Schindelin
Am 24.01.2017 um 21:39 schrieb Jeff King:
> On Tue, Jan 24, 2017 at 07:00:03PM +0100, René Scharfe wrote:
>
>>> I do worry about having to support more implementations in the
>>> future that have different function signature for the comparison
>>> callbacks, which will make things ugly, but this addition alone
>>> doesn't look too bad to me.
>>
>> It is unfair of me to show a 5% speedup and then recommend to not
>> include it. ;-)  That difference won't be measurable in real use cases
>> and the patch is not necessary.  This patch is simple, but the overall
>> complexity (incl. #ifdefs etc.) will be lower without it.
>
> I care less about squeezing out the last few percent performance and
> more that somebody libc qsort_r() might offer some other improvement.
> For instance, it could sort in-place to lower memory use for some cases,
> or do some clever thing that gives more than a few percent in the real
> world (something like TimSort).
>
> I don't know to what degree we should care about that.
That's a good question.  Which workloads spend a significant amount of 
time in qsort/qsort_s?
We could track processor time spent and memory allocated in QSORT_S and 
the whole program and show a warning at the end if one of the two 
exceeded, say, 5% of the total, asking nicely to send it to our mailing 
list.  Would something like this be useful for other functions or 
metrics as well?  Would it be too impolite to use users as telemetry 
transports?
If we find such cases then we'd better fix them for all platforms, e.g. 
by importing timsort, no?
René
^ permalink raw reply	[flat|nested] 18+ messages in thread 
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-25 18:43         ` René Scharfe
@ 2017-01-25 18:51           ` Jeff King
  2017-01-26 13:49             ` Johannes Schindelin
  0 siblings, 1 reply; 18+ messages in thread
From: Jeff King @ 2017-01-25 18:51 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Git List, Johannes Schindelin
On Wed, Jan 25, 2017 at 07:43:01PM +0100, René Scharfe wrote:
> We could track processor time spent and memory allocated in QSORT_S and the
> whole program and show a warning at the end if one of the two exceeded, say,
> 5% of the total, asking nicely to send it to our mailing list.  Would
> something like this be useful for other functions or metrics as well?  Would
> it be too impolite to use users as telemetry transports?
Frankly, that sounds a bit overboard to me. If people want to profile
there are profiling tools. If we want users to profile on their systems
and send results to us, I think I'd rather give them instructions or a
wrapper script for doing so.
> If we find such cases then we'd better fix them for all platforms, e.g. by
> importing timsort, no?
Yes, as long as they are strict improvements. I can't think of a case
where some sorting behavior is a matter of opinion, and they'd prefer
Git behave like the rest of their system rather than like Git on other
systems[1].
-Peff
[1] I wonder the same about regex implementations. I generally consider
    our GNU regex fallback to be a strict improvement over system
    versions, at least in terms of features. But I was interested to see
    recently that musl implements pcre-style "\x" as an extension, but
    it _doesn't_ do REG_STARTEND. So it's at tradeoff. They have to use
    the fallback on newer versions of git (to get REG_STARTEND), but
    that means the rest of their system understands "\x" but Git does
    not.
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
  2017-01-25 18:51           ` Jeff King
@ 2017-01-26 13:49             ` Johannes Schindelin
  0 siblings, 0 replies; 18+ messages in thread
From: Johannes Schindelin @ 2017-01-26 13:49 UTC (permalink / raw)
  To: Jeff King; +Cc: René Scharfe, Junio C Hamano, Git List
[-- Attachment #1: Type: text/plain, Size: 425 bytes --]
Hi,
On Wed, 25 Jan 2017, Jeff King wrote:
> On Wed, Jan 25, 2017 at 07:43:01PM +0100, René Scharfe wrote:
> 
> > If we find such cases then we'd better fix them for all platforms, e.g. by
> > importing timsort, no?
> 
> Yes, as long as they are strict improvements.
I think in many cases, we may be better off by replacing the use of a
string-list for lookups by hashmaps for the same purpose.
Ciao,
Dscho
^ permalink raw reply	[flat|nested] 18+ messages in thread 
 
 
 
 
 
 
- * Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
  2017-01-22 17:47 [PATCH v2 0/5] string-list: make string_list_sort() reentrant René Scharfe
                   ` (5 preceding siblings ...)
  2017-01-22 18:02 ` [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1) René Scharfe
@ 2017-01-23 23:54 ` Jeff King
  2017-01-24 11:44   ` Johannes Schindelin
  2017-01-24 18:00   ` René Scharfe
  6 siblings, 2 replies; 18+ messages in thread
From: Jeff King @ 2017-01-23 23:54 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano, Johannes Schindelin
On Sun, Jan 22, 2017 at 06:47:18PM +0100, René Scharfe wrote:
> Use qsort_s() from C11 Annex K to make string_list_sort() safer, in
> particular when called from parallel threads.
> 
> Changes from v1:
> * Renamed HAVE_QSORT_S to HAVE_ISO_QSORT_S in Makefile to disambiguate.
> * Added basic perf test (patch 3).
> * Converted a second caller to QSORT_S, in ref-filter.c (patch 5).
This looks nicely done overall, and I appreciate the perf test.
The speed looks like a reasonable outcome. I'm torn on the qsort_r()
demo patch. I don't think it looks too bad. OTOH, I don't think I would
want to deal with the opposite-argument-order versions.
Is there any interest in people adding the ISO qsort_s() to their libc
implementations? It seems like it's been a fair number of years by now.
-Peff
^ permalink raw reply	[flat|nested] 18+ messages in thread
- * Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
  2017-01-23 23:54 ` [PATCH v2 0/5] string-list: make string_list_sort() reentrant Jeff King
@ 2017-01-24 11:44   ` Johannes Schindelin
  2017-01-24 13:44     ` Jeff King
  2017-01-24 18:00   ` René Scharfe
  1 sibling, 1 reply; 18+ messages in thread
From: Johannes Schindelin @ 2017-01-24 11:44 UTC (permalink / raw)
  To: Jeff King; +Cc: René Scharfe, Git List, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 522 bytes --]
Hi Peff,
On Mon, 23 Jan 2017, Jeff King wrote:
> Is there any interest in people adding the ISO qsort_s() to their libc
> implementations? It seems like it's been a fair number of years by now.
Visual C supports it *at least* since Visual Studio 2005:
https://msdn.microsoft.com/en-us/library/4xc60xas(v=vs.80).aspx
With René's patch, we have an adapter for GNU libc, and if anybody comes
up with the (equally trivial) adapter for BSD libc's qsort_r(), we have a
lot of bases covered.
Ciao,
Johannes
^ permalink raw reply	[flat|nested] 18+ messages in thread 
- * Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
  2017-01-24 11:44   ` Johannes Schindelin
@ 2017-01-24 13:44     ` Jeff King
  0 siblings, 0 replies; 18+ messages in thread
From: Jeff King @ 2017-01-24 13:44 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: René Scharfe, Git List, Junio C Hamano
On Tue, Jan 24, 2017 at 12:44:10PM +0100, Johannes Schindelin wrote:
> Hi Peff,
> 
> On Mon, 23 Jan 2017, Jeff King wrote:
> 
> > Is there any interest in people adding the ISO qsort_s() to their libc
> > implementations? It seems like it's been a fair number of years by now.
> 
> Visual C supports it *at least* since Visual Studio 2005:
> 
> https://msdn.microsoft.com/en-us/library/4xc60xas(v=vs.80).aspx
> 
> With René's patch, we have an adapter for GNU libc, and if anybody comes
> up with the (equally trivial) adapter for BSD libc's qsort_r(), we have a
> lot of bases covered.
Sadly, no. Microsoft's qsort_s() is not compatible with the ISO C one.
And BSD's qsort_r() has a similar problem acting as a wrapper, because
the order of arguments in the callback functions is different (so you'd
have to actually wrap the callback, too, and rearrange the arguments for
each call, or do some macro trickery).
Gory details are in:
  https://stackoverflow.com/questions/39560773/different-declarations-of-qsort-r-on-mac-and-linux/39561369
and the original thread:
  http://public-inbox.org/git/3083fbf7-d67e-77e4-e05f-94a7e7e15eba@web.de/
-Peff
^ permalink raw reply	[flat|nested] 18+ messages in thread 
 
- * Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
  2017-01-23 23:54 ` [PATCH v2 0/5] string-list: make string_list_sort() reentrant Jeff King
  2017-01-24 11:44   ` Johannes Schindelin
@ 2017-01-24 18:00   ` René Scharfe
  2017-01-24 20:41     ` Jeff King
  1 sibling, 1 reply; 18+ messages in thread
From: René Scharfe @ 2017-01-24 18:00 UTC (permalink / raw)
  To: Jeff King; +Cc: Git List, Junio C Hamano, Johannes Schindelin
Am 24.01.2017 um 00:54 schrieb Jeff King:
> The speed looks like a reasonable outcome. I'm torn on the qsort_r()
> demo patch. I don't think it looks too bad. OTOH, I don't think I would
> want to deal with the opposite-argument-order versions.
The code itself may look OK, but it's not really necessary and the 
special implementation for Linux makes increases maintenance costs.  Can 
we save it for later and first give the common implemention a chance to 
prove itself?
> Is there any interest in people adding the ISO qsort_s() to their libc
> implementations? It seems like it's been a fair number of years by now.
https://sourceware.org/ml/libc-alpha/2014-12/msg00513.html is the last 
post mentioning qsort_s on the glibc mailing list, but it didn't even 
make it into https://sourceware.org/glibc/wiki/Development_Todo/Master.
Not sure what's planned in BSD land, didn't find anything (but didn't 
look too hard).
René
^ permalink raw reply	[flat|nested] 18+ messages in thread 
- * Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
  2017-01-24 18:00   ` René Scharfe
@ 2017-01-24 20:41     ` Jeff King
  0 siblings, 0 replies; 18+ messages in thread
From: Jeff King @ 2017-01-24 20:41 UTC (permalink / raw)
  To: René Scharfe; +Cc: Git List, Junio C Hamano, Johannes Schindelin
On Tue, Jan 24, 2017 at 07:00:07PM +0100, René Scharfe wrote:
> Am 24.01.2017 um 00:54 schrieb Jeff King:
> > The speed looks like a reasonable outcome. I'm torn on the qsort_r()
> > demo patch. I don't think it looks too bad. OTOH, I don't think I would
> > want to deal with the opposite-argument-order versions.
> 
> The code itself may look OK, but it's not really necessary and the special
> implementation for Linux makes increases maintenance costs.  Can we save it
> for later and first give the common implemention a chance to prove itself?
Sure, I'm OK with leaving it out for now.
> > Is there any interest in people adding the ISO qsort_s() to their libc
> > implementations? It seems like it's been a fair number of years by now.
> 
> https://sourceware.org/ml/libc-alpha/2014-12/msg00513.html is the last post
> mentioning qsort_s on the glibc mailing list, but it didn't even make it
> into https://sourceware.org/glibc/wiki/Development_Todo/Master.
> Not sure what's planned in BSD land, didn't find anything (but didn't look
> too hard).
So it sounds like "no, not really". I think that's OK. I was mostly
curious if we could expect our custom implementation to age out over
time.
-Peff
^ permalink raw reply	[flat|nested] 18+ messages in thread