git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] compat: Add simplified merge sort implementation from glibc
@ 2008-02-03  1:11 Brian Downing
       [not found] ` <20080203011130.GK26392-oU/tDdhfGLReoWH0uzbU5w@public.gmane.org>
  2008-02-04  0:05 ` Edgar Toernig
  0 siblings, 2 replies; 11+ messages in thread
From: Brian Downing @ 2008-02-03  1:11 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Steffen Prohaska, git-u79uwXL29TY76Z2rM5mHXA,
	msysgit-/JYPxA39Uh5TLH3MbocFFw


qsort in Windows 2000 (and possibly other older Windows' C libraries)
is a Quicksort with the usual O(n^2) worst case.  Unfortunately, sorting
Git trees seems to get very close to that worst case quite often:

    $ /git/gitbad runstatus
    # On branch master
    qsort, nmemb = 30842
    done, 237838087 comparisons.

This patch adds a simplified version of the merge sort that is glibc's
qsort(3).  As a merge sort, this needs a temporary array equal in size
to the array that is to be sorted.

The complexity that was removed is:

* Doing direct stores for word-size and -aligned data.
* Falling back to quicksort if the allocation required to perform the
  merge sort would likely push the machine into swap.

Even with these simplifications, this seems to outperform the Windows
qsort(3) implementation, even in Windows XP (where it is "fixed" and
doesn't trigger O(n^2) complexity on trees).

[jes: moved into compat/qsort.c, as per Johannes Sixt's suggestion]

Signed-off-by: Brian Downing <bdowning-oU/tDdhfGLReoWH0uzbU5w@public.gmane.org>
Signed-off-by: Steffen Prohaska <prohaska-wjoc1KHpMeg@public.gmane.org>
Signed-off-by: Johannes Schindelin <johannes.schindelin-Mmb7MZpHnFY@public.gmane.org>
---
   Junio,

   This is for consideration for mainline Git now that 1.5.4 is out.  It
   is used to avoid an awful qsort implementation on Windows 2000, and I
   believe there was some discussion about other Unixes (AIX, etc) that
   have a similar problem.

   -bcd

 Makefile          |    7 ++++++
 compat/qsort.c    |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 git-compat-util.h |    6 +++++
 3 files changed, 73 insertions(+), 0 deletions(-)
 create mode 100644 compat/qsort.c

diff --git a/Makefile b/Makefile
index 92341c4..1698bc4 100644
--- a/Makefile
+++ b/Makefile
@@ -137,6 +137,9 @@ all::
 # Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit
 # parallel delta searching when packing objects.
 #
+# Define NEEDS_QUICK_QSORT if your qsort() implementation has O(n^2)
+# worst case complexity.
+#
 
 GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
 	@$(SHELL_PATH) ./GIT-VERSION-GEN
@@ -722,6 +725,10 @@ ifdef NO_MEMMEM
 	COMPAT_CFLAGS += -DNO_MEMMEM
 	COMPAT_OBJS += compat/memmem.o
 endif
+ifdef NEEDS_QUICK_QSORT
+	COMPAT_CFLAGS += -DNEEDS_QUICK_QSORT
+	COMPAT_OBJS += compat/qsort.o
+endif
 
 ifdef THREADED_DELTA_SEARCH
 	BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
diff --git a/compat/qsort.c b/compat/qsort.c
new file mode 100644
index 0000000..734866e
--- /dev/null
+++ b/compat/qsort.c
@@ -0,0 +1,60 @@
+#include "../git-compat-util.h"
+
+/* This merge sort implementation is simplified from glibc's. */
+static void msort_with_tmp(void *b, size_t n, size_t s,
+			   int (*cmp)(const void *, const void *),
+			   char *t)
+{
+	char *tmp;
+	char *b1, *b2;
+	size_t n1, n2;
+
+	if (n <= 1)
+		return;
+
+	n1 = n / 2;
+	n2 = n - n1;
+	b1 = b;
+	b2 = (char *)b + (n1 * s);
+
+	msort_with_tmp(b1, n1, s, cmp, t);
+	msort_with_tmp(b2, n2, s, cmp, t);
+
+	tmp = t;
+
+	while (n1 > 0 && n2 > 0) {
+		if (cmp(b1, b2) <= 0) {
+			memcpy(tmp, b1, s);
+			tmp += s;
+			b1 += s;
+			--n1;
+		} else {
+			memcpy(tmp, b2, s);
+			tmp += s;
+			b2 += s;
+			--n2;
+		}
+	}
+	if (n1 > 0)
+		memcpy(tmp, b1, n1 * s);
+	memcpy(b, t, (n - n2) * s);
+}
+
+void git_qsort(void *b, size_t n, size_t s,
+	       int (*cmp)(const void *, const void *))
+{
+	const size_t size = n * s;
+
+	if (size < 1024) {
+		char buf[size]; /* gcc-ism */
+
+		/* The temporary array is small, so put it on
+		   the stack.  */
+		msort_with_tmp(b, n, s, cmp, buf);
+	} else {
+		/* It's somewhat large, so malloc it.  */
+		char *tmp = malloc(size);
+		msort_with_tmp(b, n, s, cmp, tmp);
+		free(tmp);
+	}
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 4df90cb..e848a73 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -426,4 +426,10 @@ static inline int strtol_i(char const *s, int base, int *result)
 	return 0;
 }
 
+#ifdef NEEDS_QUICK_QSORT
+void git_qsort(void *base, size_t nmemb, size_t size,
+	       int(*compar)(const void *, const void *));
+#define qsort git_qsort
+#endif
+
 #endif
-- 
1.5.4.rc3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-02-05 21:08 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-03  1:11 [PATCH] compat: Add simplified merge sort implementation from glibc Brian Downing
     [not found] ` <20080203011130.GK26392-oU/tDdhfGLReoWH0uzbU5w@public.gmane.org>
2008-02-03  2:37   ` Johannes Schindelin
2008-02-03  4:50     ` Brian Downing
     [not found]       ` <20080203045033.GL26392-oU/tDdhfGLReoWH0uzbU5w@public.gmane.org>
2008-02-03 21:09         ` Johannes Schindelin
2008-02-05 10:19         ` Mike Ralphson
2008-02-03  6:22     ` Junio C Hamano
2008-02-04  0:05 ` Edgar Toernig
     [not found]   ` <20080204010552.03541642.froese-Mmb7MZpHnFY@public.gmane.org>
2008-02-04  2:46     ` Brian Downing
     [not found]       ` <1202209509-13760-1-git-send-email-bdowning@lavos.net>
2008-02-05 14:11         ` Brian Downing
2008-02-05 20:02           ` Junio C Hamano
2008-02-05 21:07             ` Brian Downing

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).