git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] Add a topological sort procedure to commit.c
@ 2005-06-30  5:58 Jon Seymour
  2005-06-30  6:52 ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: Jon Seymour @ 2005-06-30  5:58 UTC (permalink / raw)
  To: git; +Cc: torvalds, jon.seymour


This patch introduces an in-place topological sort procedure to commit.c

Given a list of commits, sort_in_topological_order() will perform an in-place
topological sort of that list.

The invariant that applies to the resulting list is:

	a reachable from b => ord(b) < ord(a)

This invariant is weaker than the --merge-order invariant, but is cheaper
to calculate (assuming the list has been identified) and will serve any
purpose where only a minimal topological order guarantee is required.

Signed-off-by: Jon Seymour <jon.seymour@gmail.com>
---
Note: this patch currently has no observable consequences since nothing
in this patch calls it. A future patch will use this algorithm to provide
an O(n) bisection algorithm as a suggested replacement for the
existing O(n^2) bisection algorithm.
---

 commit.c |   82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 commit.h |    5 ++++
 2 files changed, 87 insertions(+), 0 deletions(-)

9beb76a20443b8d5ee6010bf0bd55344e5b39546
diff --git a/commit.c b/commit.c
--- a/commit.c
+++ b/commit.c
@@ -3,6 +3,12 @@
 #include "commit.h"
 #include "cache.h"
 
+struct sort_node
+{
+	unsigned int indegree;
+	struct commit_list * list_item;
+};
+
 const char *commit_type = "commit";
 
 enum cmit_fmt get_commit_format(const char *arg)
@@ -346,3 +352,79 @@ int count_parents(struct commit * commit
         return count;
 }
 
+/*
+ * Performs an in-place topological sort on the list supplied
+ */
+void sort_in_topological_order(struct commit_list ** list)
+{
+	struct commit_list * next = *list;
+	struct commit_list * work = NULL;
+	struct commit_list ** pptr = list;
+	struct sort_node * nodes;
+	struct sort_node * next_nodes;
+	int count = 0;
+
+	/* determine the size of the list */
+	while (next) {
+		next = next->next;
+		count++;
+	}
+	/* allocate an array to help sort the list */
+	nodes = xmalloc(sizeof(*nodes) * count);
+	/* link the list to the array */
+	next_nodes = nodes;
+	next=*list;
+	while (next) {
+		next_nodes->list_item = next;
+		next->item->object.util = next_nodes;
+		next_nodes++;
+		next = next->next;
+	}
+	/* update the indegree */
+	next=*list;
+	while (next) {
+		struct commit_list * parents = next->item->parents;
+		while (parents) {
+			struct commit * parent=parents->item;
+			struct sort_node * pn = (struct sort_node *)parent->object.util;
+			
+			if (pn)
+				pn->indegree++;
+			parents=parents->next;
+		}
+		next=next->next;
+	}
+	/* find the roots */
+	next=*list;
+	while (next) {
+		struct sort_node * node = (struct sort_node *)next->item->object.util;
+
+		if (node->indegree == 0) {
+			commit_list_insert(next->item, &work);
+		}
+		next=next->next;
+	}
+	/* process the list in topological order */
+	while (work) {
+		struct commit * work_item = pop_commit(&work);
+		struct sort_node * work_node = (struct sort_node *)work_item->object.util;
+		struct commit_list * parents = work_item->parents;
+
+		while (parents) {
+			struct commit * parent=parents->item;
+			struct sort_node * pn = (struct sort_node *)parent->object.util;
+			
+			if (pn) {
+				pn->indegree--;
+				if (!pn->indegree) 
+					commit_list_insert(parent, &work);
+			}
+			parents=parents->next;
+		}
+		*pptr = work_node->list_item;
+		work_node->list_item->next = NULL;
+		pptr = &(*pptr)->next;
+		work_item->object.util = NULL;
+	}
+	free(nodes);
+}
diff --git a/commit.h b/commit.h
--- a/commit.h
+++ b/commit.h
@@ -55,4 +55,9 @@ struct commit *pop_most_recent_commit(st
 struct commit *pop_commit(struct commit_list **stack);
 
 int count_parents(struct commit * commit);
+
+/*
+ * Performs an in-place topological sort of list supplied.
+ */
+void sort_in_topological_order(struct commit_list ** list);
 #endif /* COMMIT_H */
------------

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Add a topological sort procedure to commit.c
  2005-06-30  5:58 [PATCH 1/1] Add a topological sort procedure to commit.c Jon Seymour
@ 2005-06-30  6:52 ` Junio C Hamano
  2005-06-30  7:00   ` Jon Seymour
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2005-06-30  6:52 UTC (permalink / raw)
  To: Jon Seymour; +Cc: Linus Torvalds, git

Interesting idea.  Help me understand the code.

@@ -346,3 +352,79 @@ int count_parents(struct commit * commit
         return count;
 }
 
+/*
+ * Performs an in-place topological sort on the list supplied
+ */
+void sort_in_topological_order(struct commit_list ** list)
+{
+	...
+	/* allocate an array to help sort the list */
+	nodes = xmalloc(sizeof(*nodes) * count);
+	/* link the list to the array */
+	next_nodes = nodes;
+	next=*list;
+	while (next) {
+		next_nodes->list_item = next;
+		next->item->object.util = next_nodes;
+		next_nodes++;
+		next = next->next;
+	}
+	/* update the indegree */

Don't you want to initialize before update?  Either in the above
while(next) loop or just after xmalloc() with a single
memset(0), or xcalloc()?

+	next=*list;
+	while (next) {
+		struct commit_list * parents = next->item->parents;
+		while (parents) {
+			struct commit * parent=parents->item;
+			struct sort_node * pn = (struct sort_node *)parent->object.util;
+			
+			if (pn)
+				pn->indegree++;

I take this to mean that not all commits are on *list and such
commits not on *list have object.util set to NULL.  Who
initializes object.util (this is not a nitpick but a question as
a user)?   commit.c::lookup_commit() uses memset(0) and when
sort_in_topological_order() function is called everybody (not
limited to the ones on *list but all commits reachable from
them) are supposed to have object.util set to NULL?

So sort_node->indegree means how many children of it are on the
*list.  Am I reading you correctly so far?

+			parents=parents->next;
+		}
+		next=next->next;
+	}
+	/* find the roots */
+	next=*list;
+	while (next) {
+		struct sort_node * node = (struct sort_node *)next->item->object.util;
+
+		if (node->indegree == 0) {
+			commit_list_insert(next->item, &work);
+		}
+		next=next->next;
+	}

You say "find the roots", but this sounds more like finding the
tips of forests.  You are finding people without children,
right (again, not a nitpick but trying to understand the code)?

+	/* process the list in topological order */
+	while (work) {
+		struct commit * work_item = pop_commit(&work);
+		struct sort_node * work_node = (struct sort_node *)work_item->object.util;
+		struct commit_list * parents = work_item->parents;
+
+		while (parents) {
+			struct commit * parent=parents->item;
+			struct sort_node * pn = (struct sort_node *)parent->object.util;
+			
+			if (pn) {
+				pn->indegree--;
+				if (!pn->indegree) 
+					commit_list_insert(parent, &work);

And when you look at each parent, and push the parent into work
queue when you have seen all its children.

+			}
+			parents=parents->next;
+		}

And at this point work_item, popped from your work queue, is
guaranteed to be a commit all of whose children have been
processed (i.e. pushed into the original *list).

+		*pptr = work_node->list_item;
+		work_node->list_item->next = NULL;
+		pptr = &(*pptr)->next;
+		work_item->object.util = NULL;

+	}
+	free(nodes);
+}

By the way, you seem to be using "git format-patch".  Do you
want to help me pushing it upstream ;-)?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Add a topological sort procedure to commit.c
  2005-06-30  6:52 ` Junio C Hamano
@ 2005-06-30  7:00   ` Jon Seymour
  2005-06-30  7:13     ` Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: Jon Seymour @ 2005-06-30  7:00 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, git

On 6/30/05, Junio C Hamano <junkio@cox.net> wrote:
> Interesting idea.  Help me understand the code.
> 
> @@ -346,3 +352,79 @@ int count_parents(struct commit * commit
>          return count;
>  }
> 
> +/*
> + * Performs an in-place topological sort on the list supplied
> + */
> +void sort_in_topological_order(struct commit_list ** list)
> +{
> +       ...
> +       /* allocate an array to help sort the list */
> +       nodes = xmalloc(sizeof(*nodes) * count);
> +       /* link the list to the array */
> +       next_nodes = nodes;
> +       next=*list;
> +       while (next) {
> +               next_nodes->list_item = next;
> +               next->item->object.util = next_nodes;
> +               next_nodes++;
> +               next = next->next;
> +       }
> +       /* update the indegree */
> 
> Don't you want to initialize before update?  Either in the above
> while(next) loop or just after xmalloc() with a single
> memset(0), or xcalloc()?

Oops, yes I do.

> 
> +       next=*list;
> +       while (next) {
> +               struct commit_list * parents = next->item->parents;
> +               while (parents) {
> +                       struct commit * parent=parents->item;
> +                       struct sort_node * pn = (struct sort_node *)parent->object.util;
> +
> +                       if (pn)
> +                               pn->indegree++;
> 
> I take this to mean that not all commits are on *list and such
> commits not on *list have object.util set to NULL.  Who
> initializes object.util (this is not a nitpick but a question as
> a user)?   commit.c::lookup_commit() uses memset(0) and when
> sort_in_topological_order() function is called everybody (not
> limited to the ones on *list but all commits reachable from
> them) are supposed to have object.util set to NULL?
> 

Yes, that is the current assumption. Alternatively, I could save the current
version of object.util in the temporary structure and restore it when done.
Perhaps I'll do that.

> So sort_node->indegree means how many children of it are on the
> *list.  Am I reading you correctly so far?

Correct.

> 
> +                       parents=parents->next;
> +               }
> +               next=next->next;
> +       }
> +       /* find the roots */
> +       next=*list;
> +       while (next) {
> +               struct sort_node * node = (struct sort_node *)next->item->object.util;
> +
> +               if (node->indegree == 0) {
> +                       commit_list_insert(next->item, &work);
> +               }
> +               next=next->next;
> +       }
> 
> You say "find the roots", but this sounds more like finding the
> tips of forests.  You are finding people without children,
> right (again, not a nitpick but trying to understand the code)?

True. Should reword the comment to find the tips.

> 
> +       /* process the list in topological order */
> +       while (work) {
> +               struct commit * work_item = pop_commit(&work);
> +               struct sort_node * work_node = (struct sort_node *)work_item->object.util;
> +               struct commit_list * parents = work_item->parents;
> +
> +               while (parents) {
> +                       struct commit * parent=parents->item;
> +                       struct sort_node * pn = (struct sort_node *)parent->object.util;
> +
> +                       if (pn) {
> +                               pn->indegree--;
> +                               if (!pn->indegree)
> +                                       commit_list_insert(parent, &work);
> 
> And when you look at each parent, and push the parent into work
> queue when you have seen all its children.

Correct.

> 
> +                       }
> +                       parents=parents->next;
> +               }
> 
> And at this point work_item, popped from your work queue, is
> guaranteed to be a commit all of whose children have been
> processed (i.e. pushed into the original *list).

Yep.

> 
> +               *pptr = work_node->list_item;
> +               work_node->list_item->next = NULL;
> +               pptr = &(*pptr)->next;
> +               work_item->object.util = NULL;
> 
> +       }
> +       free(nodes);
> +}
> 
> By the way, you seem to be using "git format-patch".  Do you
> want to help me pushing it upstream ;-)?

Yep, but can you fix the PATCH 1/1 thing first :-)

I'll rework the patch, incorporating answers to your questions into the comment.

Regards,

jon.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] Add a topological sort procedure to commit.c
  2005-06-30  7:00   ` Jon Seymour
@ 2005-06-30  7:13     ` Junio C Hamano
  2005-06-30  7:36       ` [PATCH] git-format-patch: Prepare patches for e-mail submission Junio C Hamano
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2005-06-30  7:13 UTC (permalink / raw)
  To: jon; +Cc: Linus Torvalds, git

>>>>> "JS" == Jon Seymour <jon.seymour@gmail.com> writes:

>> By the way, you seem to be using "git format-patch".  Do you
>> want to help me pushing it upstream ;-)?

JS> Yep, but can you fix the PATCH 1/1 thing first :-)

Fix how?  Only special case 1/1?

That is easy to do, so I would do it, but on the other hand I
always end up editing the line anyway (my [PATCH 2/3] is often
originally [PATCH 4/7] because I have more than one series since
forked from upstream, and I would need to move it to Subject:
line), so it would not help very much, at least in my case.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] git-format-patch: Prepare patches for e-mail submission.
  2005-06-30  7:13     ` Junio C Hamano
@ 2005-06-30  7:36       ` Junio C Hamano
  0 siblings, 0 replies; 5+ messages in thread
From: Junio C Hamano @ 2005-06-30  7:36 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: jon, git

This is the script I use to prepare patches for e-mail submission.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

*** Jon, I made it by default not to say [PATCH N/M] for any M,
*** and you can turn it on with --numbered flag.   Even with
*** --numbered, " N/M" is not shown if M==1.

 Makefile                |    3 +
 git-format-patch-script |  122 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletions(-)
 create mode 100755 git-format-patch-script

16daa73282d5aa1cc7d227945aea193553fdfaad
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,8 @@ SCRIPTS=git git-apply-patch-script git-m
 	git-fetch-script git-status-script git-commit-script \
 	git-log-script git-shortlog git-cvsimport-script git-diff-script \
 	git-reset-script git-add-script git-checkout-script git-clone-script \
-	gitk git-cherry git-rebase-script git-relink-script
+	gitk git-cherry git-rebase-script git-relink-script \
+	git-format-patch-script
 
 PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
 	git-read-tree git-commit-tree git-cat-file git-fsck-cache \
diff --git a/git-format-patch-script b/git-format-patch-script
new file mode 100755
--- /dev/null
+++ b/git-format-patch-script
@@ -0,0 +1,122 @@
+#!/bin/sh
+#
+# Copyright (c) 2005 Junio C Hamano
+#
+
+usage () {
+    echo >&2 "usage: $0"' [-n] [-o dir] [-<diff options>...] upstream [ our-head ]
+
+Prepare each commit with its patch since our-head forked from upstream,
+one file per patch, for e-mail submission.  Each output file is
+numbered sequentially from 1, and uses the first line of the commit
+message (massaged for pathname safety) as the filename.
+
+When -o is specified, output files are created in that directory; otherwise in
+the current working directory.
+
+When -n is specified, instead of "[PATCH] Subject", the first line is formatted
+as "[PATCH N/M] Subject", unless you have only one patch.
+'
+    exit 1
+}
+
+diff_opts=
+IFS='
+'
+LF='
+'
+outdir=./
+
+while case "$#" in 0) break;; esac
+do
+    case "$1" in
+    -n|--n|--nu|--num|--numb|--numbe|--number|--numbere|--numbered)
+    numbered=t ;;
+    -o=*|--o=*|--ou=*|--out=*|--outp=*|--outpu=*|--output=*|--output-=*|\
+    --output-d=*|--output-di=*|--output-dir=*|--output-dire=*|\
+    --output-direc=*|--output-direct=*|--output-directo=*|\
+    --output-director=*|--output-directory=*)
+    outdir=`expr "$1" : '-[^=]*=\(.*\)'` ;;
+    -o|--o|--ou|--out|--outp|--outpu|--output|--output-|--output-d|\
+    --output-di|--output-dir|--output-dire|--output-direc|--output-direct|\
+    --output-directo|--output-director|--output-directory)
+    case "$#" in 1) usage ;; esac; shift
+    outdir="$1" ;;
+    -*)	diff_opts="$diff_opts$LF$1" ;;
+    *) break ;;
+    esac
+    shift
+done
+
+case "$#" in
+2)    linus="$1" junio="$2" ;;
+1)    linus="$1" junio=HEAD ;;
+*)    usage ;;
+esac
+
+case "$outdir" in
+*/) ;;
+*) outdir="$outdir/" ;;
+esac
+test -d "$outdir" || mkdir -p "$outdir" || exit
+
+tmp=.tmp-series$$
+trap 'rm -f $tmp-*' 0 1 2 3 15
+
+series=$tmp-series
+
+titleScript='
+	1,/^$/d
+	: loop
+	/^$/b loop
+	s/[^-a-z.A-Z_0-9]/-/g
+        s/\.\.\.*/\./g
+	s/\.*$//
+	s/--*/-/g
+	s/^-//
+	s/-$//
+	s/$/./
+	q
+'
+
+_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
+_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
+stripCommitHead='/^'"$_x40"' (from '"$_x40"')$/d'
+
+git-rev-list "$junio" "^$linus" >$series
+total=`wc -l <$series`
+i=$total
+while read commit
+do
+    title=`git-cat-file commit "$commit" | sed -e "$titleScript"`
+    case "$numbered" in
+    '') num= ;;
+    *)
+	case $total in
+	1) num= ;;
+	*) num=' '`printf "%d/%d" $i $total` ;;
+	esac
+    esac
+    file=`printf '%04d-%stxt' $i "$title"`
+    i=`expr "$i" - 1`
+    echo "$file"
+    {
+	mailScript='
+	1,/^$/d
+	: loop
+	/^$/b loop
+	s|^|[PATCH'"$num"'] |
+	: body
+	p
+	n
+	b body'
+
+	git-cat-file commit "$commit" | sed -ne "$mailScript"
+	echo '---'
+	echo
+	git-diff-tree -p $diff_opts "$commit" | git-apply --stat --summary
+	echo
+	git-diff-tree -p $diff_opts "$commit" | sed -e "$stripCommitHead"
+	echo '------------'
+    } >"$outdir$file"
+done <$series
------------

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-06-30  7:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-30  5:58 [PATCH 1/1] Add a topological sort procedure to commit.c Jon Seymour
2005-06-30  6:52 ` Junio C Hamano
2005-06-30  7:00   ` Jon Seymour
2005-06-30  7:13     ` Junio C Hamano
2005-06-30  7:36       ` [PATCH] git-format-patch: Prepare patches for e-mail submission Junio C Hamano

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).