git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Jeff King <peff@peff.net>
Cc: Martin Fick <mfick@codeaurora.org>,
	Junio C Hamano <gitster@pobox.com>,
	git@vger.kernel.org
Subject: [PATCH 1/3] add mergesort() for linked lists
Date: Sun, 01 Apr 2012 00:10:11 +0200	[thread overview]
Message-ID: <4F7780C3.2050408@lsrfire.ath.cx> (raw)
In-Reply-To: <20120330094052.GB12298@sigill.intra.peff.net>

This adds a generic bottom-up mergesort implementation for singly linked
lists.  It was inspired by Simon Tatham's webpage on the topic[1], but
not so much by his implementation -- for no good reason, really, just a
case of NIH.

[1] http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.html

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
As you can guess from the patch, I just love function pointers. :)  It
may be interesting to know how much overhead they add, but in the test
case you described (a ref for each revision of git.git) the call to
mergesort (wired up in patch 3) only contributes less than 1% of the cost
according to callgrind, including its callees.

WARNING!  This is fresh code, and while the algorithm is simple, it's
possible that I was still somehow able to sneak in a bug.

 .gitignore       |    1 +
 Makefile         |    3 +++
 mergesort.c      |   75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 mergesort.h      |    9 +++++++
 test-mergesort.c |   52 +++++++++++++++++++++++++++++++++++++
 5 files changed, 140 insertions(+)
 create mode 100644 mergesort.c
 create mode 100644 mergesort.h
 create mode 100644 test-mergesort.c

diff --git a/.gitignore b/.gitignore
index 87fcc5f..225656a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -180,6 +180,7 @@
 /test-index-version
 /test-line-buffer
 /test-match-trees
+/test-mergesort
 /test-mktemp
 /test-parse-options
 /test-path-utils
diff --git a/Makefile b/Makefile
index be1957a..b04d6ec 100644
--- a/Makefile
+++ b/Makefile
@@ -480,6 +480,7 @@ TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-index-version
 TEST_PROGRAMS_NEED_X += test-line-buffer
 TEST_PROGRAMS_NEED_X += test-match-trees
+TEST_PROGRAMS_NEED_X += test-mergesort
 TEST_PROGRAMS_NEED_X += test-mktemp
 TEST_PROGRAMS_NEED_X += test-parse-options
 TEST_PROGRAMS_NEED_X += test-path-utils
@@ -590,6 +591,7 @@ LIB_H += log-tree.h
 LIB_H += mailmap.h
 LIB_H += merge-file.h
 LIB_H += merge-recursive.h
+LIB_H += mergesort.h
 LIB_H += notes.h
 LIB_H += notes-cache.h
 LIB_H += notes-merge.h
@@ -694,6 +696,7 @@ LIB_OBJS += mailmap.o
 LIB_OBJS += match-trees.o
 LIB_OBJS += merge-file.o
 LIB_OBJS += merge-recursive.o
+LIB_OBJS += mergesort.o
 LIB_OBJS += name-hash.o
 LIB_OBJS += notes.o
 LIB_OBJS += notes-cache.o
diff --git a/mergesort.c b/mergesort.c
new file mode 100644
index 0000000..c0f1874
--- /dev/null
+++ b/mergesort.c
@@ -0,0 +1,75 @@
+#include "cache.h"
+#include "mergesort.h"
+
+#include "commit.h"
+
+struct mergesort_sublist {
+	void *ptr;
+	unsigned long len;
+};
+
+static void *get_nth_next(void *list, unsigned long n,
+			  void *(*get_next_fn)(const void *))
+{
+	while (n-- && list)
+		list = get_next_fn(list);
+	return list;
+}
+
+static void *pop_item(struct mergesort_sublist *l,
+		      void *(*get_next_fn)(const void *))
+{
+	void *p = l->ptr;
+	l->ptr = get_next_fn(l->ptr);
+	l->len = l->ptr ? (l->len - 1) : 0;
+	return p;
+}
+
+void *mergesort(void *list,
+		void *(*get_next_fn)(const void *),
+		void (*set_next_fn)(void *, void *),
+		int (*compare_fn)(const void *, const void *))
+{
+	unsigned long l;
+
+	if (!list)
+		return NULL;
+	for (l = 1; ; l *= 2) {
+		void *curr;
+		struct mergesort_sublist p, q;
+
+		p.ptr = list;
+		q.ptr = get_nth_next(p.ptr, l, get_next_fn);
+		if (!q.ptr)
+			break;
+		p.len = q.len = l;
+
+		if (compare_fn(p.ptr, q.ptr) > 0)
+			list = curr = pop_item(&q, get_next_fn);
+		else
+			list = curr = pop_item(&p, get_next_fn);
+
+		while (p.ptr) {
+			while (p.len || q.len) {
+				void *prev = curr;
+
+				if (!p.len)
+					curr = pop_item(&q, get_next_fn);
+				else if (!q.len)
+					curr = pop_item(&p, get_next_fn);
+				else if (compare_fn(p.ptr, q.ptr) > 0)
+					curr = pop_item(&q, get_next_fn);
+				else
+					curr = pop_item(&p, get_next_fn);
+				set_next_fn(prev, curr);
+			}
+			p.ptr = q.ptr;
+			p.len = l;
+			q.ptr = get_nth_next(p.ptr, l, get_next_fn);
+			q.len = q.ptr ? l : 0;
+
+		}
+		set_next_fn(curr, NULL);
+	}
+	return list;
+}
diff --git a/mergesort.h b/mergesort.h
new file mode 100644
index 0000000..d6e5f4a
--- /dev/null
+++ b/mergesort.h
@@ -0,0 +1,9 @@
+#ifndef MERGESORT_H
+#define MERGESORT_H
+
+void *mergesort(void *list,
+		void *(*get_next_fn)(const void *),
+		void (*set_next_fn)(void *, void *),
+		int (*compare_fn)(const void *, const void *));
+
+#endif
diff --git a/test-mergesort.c b/test-mergesort.c
new file mode 100644
index 0000000..02441ab
--- /dev/null
+++ b/test-mergesort.c
@@ -0,0 +1,52 @@
+#include "cache.h"
+#include "mergesort.h"
+
+struct line {
+	char *text;
+	struct line *next;
+};
+
+static void *get_next(const void *a)
+{
+	return ((const struct line *)a)->next;
+}
+
+static void set_next(void *a, void *b)
+{
+	((struct line *)a)->next = b;
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+	const struct line *x = a, *y = b;
+	return strcmp(x->text, y->text);
+}
+
+int main(int argc, const char **argv)
+{
+	struct line *line, *p = NULL, *lines = NULL;
+	struct strbuf sb = STRBUF_INIT;
+
+	for (;;) {
+		if (strbuf_getwholeline_fd(&sb, 0, '\n'))
+			break;
+		line = xmalloc(sizeof(struct line));
+		line->text = strbuf_detach(&sb, NULL);
+		if (p) {
+			line->next = p->next;
+			p->next = line;
+		} else {
+			line->next = NULL;
+			lines = line;
+		}
+		p = line;
+	}
+
+	lines = mergesort(lines, get_next, set_next, compare_strings);
+
+	while (lines) {
+		printf("%s", lines->text);
+		lines = lines->next;
+	}
+	return 0;
+}
-- 
1.7.9.2

  parent reply	other threads:[~2012-03-31 22:11 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-30  0:18 Git push performance problems with ~100K refs Martin Fick
2012-03-30  2:12 ` Junio C Hamano
2012-03-30  2:43   ` Martin Fick
2012-03-30  9:32     ` Jeff King
2012-03-30  9:40       ` Jeff King
2012-03-30 14:22         ` Martin Fick
2012-03-31 22:10         ` René Scharfe [this message]
2012-04-05 19:17           ` [PATCH 1/3] add mergesort() for linked lists Junio C Hamano
2012-04-08 20:32             ` René Scharfe
2012-04-09 18:26               ` Junio C Hamano
2012-04-11  6:19           ` Stephen Boyd
2012-04-11 16:44             ` Junio C Hamano
2012-03-31 22:10         ` [PATCH 2/3] commit: use mergesort() in commit_list_sort_by_date() René Scharfe
2012-03-31 22:11         ` [PATCH 3/3] revision: insert unsorted, then sort in prepare_revision_walk() René Scharfe
2012-03-31 22:36           ` Martin Fick
2012-03-31 23:45           ` Junio C Hamano
2012-04-02 16:24           ` Martin Fick
2012-04-02 16:39             ` Shawn Pearce
2012-04-02 16:49               ` Martin Fick
2012-04-02 16:51                 ` Shawn Pearce
2012-04-02 20:37                   ` Jeff King
2012-04-02 20:51                     ` Jeff King
2012-04-02 23:16                     ` Martin Fick
2012-04-03  3:49                     ` Nguyen Thai Ngoc Duy
2012-04-03  5:55                       ` Martin Fick
2012-04-03  6:55                         ` [PATCH 0/3] Commit cache Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 1/3] parse_commit_buffer: rename a confusing variable name Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 2/3] Add commit cache to help speed up commit traversal Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 3/3] Add parse_commit_for_rev() to take advantage of sha1-cache Nguyễn Thái Ngọc Duy
2012-04-05 13:02                       ` [PATCH 3/3] revision: insert unsorted, then sort in prepare_revision_walk() Nguyen Thai Ngoc Duy
2012-04-06 19:21                         ` Shawn Pearce
2012-04-07  4:20                           ` Nguyen Thai Ngoc Duy
2012-04-03  3:44                   ` Nguyen Thai Ngoc Duy
2012-04-02 20:14           ` Jeff King
2012-04-02 22:54             ` René Scharfe
2012-04-03  8:40               ` Jeff King
2012-04-03  9:19                 ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F7780C3.2050408@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mfick@codeaurora.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).