git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Junio C Hamano <gitster@pobox.com>
Cc: Jeff King <peff@peff.net>, Martin Fick <mfick@codeaurora.org>,
	git@vger.kernel.org
Subject: Re: [PATCH 1/3] add mergesort() for linked lists
Date: Sun, 08 Apr 2012 22:32:38 +0200	[thread overview]
Message-ID: <4F81F5E6.2070609@lsrfire.ath.cx> (raw)
In-Reply-To: <7vpqbm56pf.fsf@alter.siamese.dyndns.org>

Am 05.04.2012 21:17, schrieb Junio C Hamano:
> After seeing "I wrote it myself due to NIH", it strikes me a bit odd that
> you still used "start from bunch of singleton sublist, elongating twice
> per round as we go" structure from the original.

It's just becasue the dumb bottom-up approach is the most simple way to
implement merge sort.

> I wonder if it would be an improvement if you structured the loop so that:
> 
>   (1) the first sublist 'p' grabs as many elements in the ascending order
>       as you find;
> 
>   (2) the second sublist 'q' begins at the end of the first sublist and
>       grabs as many elements in the ascending order;
> 
>   (3) 'p' and 'q' are merge-sorted into the result list;
> 
>   (4) if your two sublists did not cover "list" in its entirety, process
>       the remainder (i.e. where the second sublist stopped because of an
>       unordered element) by going back to step (1); and
> 
>   (5) if you did not need to jump back to step (1) from step (4), then you
>       had only two sublists (or less), so the result is sorted.  Otherwise,
>       the result now has fewer ascending sublists than the original, so go
>       back to (1) and iterate.
> 
> If the input is in a random order, this may end up doing the same number
> of iterations as the original, but if the input is mostly sorted, wouldn't
> it allow us to take advantage of the fact by starting with a longer
> sublist in the earlier rounds?

This optimization speeds up the pre-sorted case but slows down the case of
a reversed pre-sorted list because we have to determine the length of the
sublists each time, while the dumb implementation already knows it.  I
didn't measure a significant difference for Jeff's test case.  Here's my
attempt at an implementation, for reference.

---
 mergesort.c |   61 +++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/mergesort.c b/mergesort.c
index c0f1874..3a61b9b 100644
--- a/mergesort.c
+++ b/mergesort.c
@@ -8,12 +8,37 @@ struct mergesort_sublist {
 	unsigned long len;
 };
 
-static void *get_nth_next(void *list, unsigned long n,
-			  void *(*get_next_fn)(const void *))
+static unsigned long run_length(const void *list,
+				struct mergesort_sublist *next_list,
+				void *(*get_next_fn)(const void *),
+				int (*compare_fn)(const void *, const void *))
 {
-	while (n-- && list)
-		list = get_next_fn(list);
-	return list;
+	unsigned long len = 1;
+
+	if (!list)
+		return 0;
+	for (;;) {
+		void *next = get_next_fn(list);
+
+		if (!next || compare_fn(list, next) > 0) {
+			if (next_list)
+				next_list->ptr = next;
+			break;
+		}
+		list = next;
+		len++;
+	}
+	return len;
+}
+
+static void set_next_pair(struct mergesort_sublist *p,
+			  struct mergesort_sublist *q, void *list,
+			  void *(*get_next_fn)(const void *),
+			  int (*compare_fn)(const void *, const void *))
+{
+	p->ptr = list;
+	p->len = run_length(p->ptr, q, get_next_fn, compare_fn);
+	q->len = q->ptr ? run_length(q->ptr, NULL, get_next_fn, compare_fn) : 0;
 }
 
 static void *pop_item(struct mergesort_sublist *l,
@@ -30,24 +55,16 @@ void *mergesort(void *list,
 		void (*set_next_fn)(void *, void *),
 		int (*compare_fn)(const void *, const void *))
 {
-	unsigned long l;
-
 	if (!list)
 		return NULL;
-	for (l = 1; ; l *= 2) {
+	for (;;) {
 		void *curr;
 		struct mergesort_sublist p, q;
 
-		p.ptr = list;
-		q.ptr = get_nth_next(p.ptr, l, get_next_fn);
+		set_next_pair(&p, &q, list, get_next_fn, compare_fn);
 		if (!q.ptr)
 			break;
-		p.len = q.len = l;
-
-		if (compare_fn(p.ptr, q.ptr) > 0)
-			list = curr = pop_item(&q, get_next_fn);
-		else
-			list = curr = pop_item(&p, get_next_fn);
+		list = curr = pop_item(&q, get_next_fn);
 
 		while (p.ptr) {
 			while (p.len || q.len) {
@@ -63,10 +80,14 @@ void *mergesort(void *list,
 					curr = pop_item(&p, get_next_fn);
 				set_next_fn(prev, curr);
 			}
-			p.ptr = q.ptr;
-			p.len = l;
-			q.ptr = get_nth_next(p.ptr, l, get_next_fn);
-			q.len = q.ptr ? l : 0;
+
+			set_next_pair(&p, &q, q.ptr, get_next_fn, compare_fn);
+			if (q.ptr) {
+				void *prev = curr;
+
+				curr = pop_item(&q, get_next_fn);
+				set_next_fn(prev, curr);
+			}
 
 		}
 		set_next_fn(curr, NULL);
-- 
1.7.10

  reply	other threads:[~2012-04-08 20:33 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-30  0:18 Git push performance problems with ~100K refs Martin Fick
2012-03-30  2:12 ` Junio C Hamano
2012-03-30  2:43   ` Martin Fick
2012-03-30  9:32     ` Jeff King
2012-03-30  9:40       ` Jeff King
2012-03-30 14:22         ` Martin Fick
2012-03-31 22:10         ` [PATCH 1/3] add mergesort() for linked lists René Scharfe
2012-04-05 19:17           ` Junio C Hamano
2012-04-08 20:32             ` René Scharfe [this message]
2012-04-09 18:26               ` Junio C Hamano
2012-04-11  6:19           ` Stephen Boyd
2012-04-11 16:44             ` Junio C Hamano
2012-03-31 22:10         ` [PATCH 2/3] commit: use mergesort() in commit_list_sort_by_date() René Scharfe
2012-03-31 22:11         ` [PATCH 3/3] revision: insert unsorted, then sort in prepare_revision_walk() René Scharfe
2012-03-31 22:36           ` Martin Fick
2012-03-31 23:45           ` Junio C Hamano
2012-04-02 16:24           ` Martin Fick
2012-04-02 16:39             ` Shawn Pearce
2012-04-02 16:49               ` Martin Fick
2012-04-02 16:51                 ` Shawn Pearce
2012-04-02 20:37                   ` Jeff King
2012-04-02 20:51                     ` Jeff King
2012-04-02 23:16                     ` Martin Fick
2012-04-03  3:49                     ` Nguyen Thai Ngoc Duy
2012-04-03  5:55                       ` Martin Fick
2012-04-03  6:55                         ` [PATCH 0/3] Commit cache Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 1/3] parse_commit_buffer: rename a confusing variable name Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 2/3] Add commit cache to help speed up commit traversal Nguyễn Thái Ngọc Duy
2012-04-03  6:55                         ` [PATCH 3/3] Add parse_commit_for_rev() to take advantage of sha1-cache Nguyễn Thái Ngọc Duy
2012-04-05 13:02                       ` [PATCH 3/3] revision: insert unsorted, then sort in prepare_revision_walk() Nguyen Thai Ngoc Duy
2012-04-06 19:21                         ` Shawn Pearce
2012-04-07  4:20                           ` Nguyen Thai Ngoc Duy
2012-04-03  3:44                   ` Nguyen Thai Ngoc Duy
2012-04-02 20:14           ` Jeff King
2012-04-02 22:54             ` René Scharfe
2012-04-03  8:40               ` Jeff King
2012-04-03  9:19                 ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F81F5E6.2070609@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mfick@codeaurora.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).