CFT: merge-recursive in C (updated)

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* CFT: merge-recursive in C (updated)
@ 2006-06-27 15:48 Alex Riesen
  2006-06-27 16:42 ` Johannes Schindelin
  2006-06-27 18:22 ` CFT: merge-recursive in C (updated) Linus Torvalds
  0 siblings, 2 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-27 15:48 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Fredrik Kuivinen, Johannes Schindelin,
	Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 821 bytes --]

Following many useful suggestions I updated the patch a little:

 - use run_command
   (and hope that whatever libc on windows does the quoting correctly)
 - use diffcore api instead of running git-diff-tree
 - use a pipe to "git-update-index --index-info" instead of using command line

Thanks Junio for the careful explanations, Johannes for his example and Linus
for inspiration :)

Good news is that it is faster: 6min vs 10min. Bad news is that it is still
not enough for me and it is only on Linux (Windows will be slower,
still testing),
uses an awful lot of memory and CPU.

---

Path list optimization should be next (and I'll be glad if someone does this
before me). Also graph algos have a greate optimization potential
(intersection, all parents of a node, node_by_sha).

Sorry for attachment, still at work.

[-- Attachment #2: merge-rec.patch --]
[-- Type: text/x-patch, Size: 57239 bytes --]

diff --git a/Makefile b/Makefile
index cde619c..660f09b 100644
--- a/Makefile
+++ b/Makefile
@@ -163,7 +163,8 @@ PROGRAMS = \
 	git-upload-pack$X git-verify-pack$X \
 	git-symbolic-ref$X \
 	git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \
-	git-describe$X git-merge-tree$X git-blame$X git-imap-send$X
+	git-describe$X git-merge-tree$X git-blame$X git-imap-send$X \
+	git-merge-recur$X
 
 BUILT_INS = git-log$X git-whatchanged$X git-show$X git-update-ref$X \
 	git-count-objects$X git-diff$X git-push$X git-mailsplit$X \
@@ -595,6 +596,10 @@ git-http-push$X: revision.o http.o http-
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
+git-merge-recur$X: merge-recursive.o graph.o path-list.o $(LIB_FILE)
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS)
+
 $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
 $(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
 $(DIFF_OBJS): diffcore.h
diff --git a/git-merge.sh b/git-merge.sh
index da5657e..04fac15 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -10,7 +10,7 @@ USAGE='[-n] [--no-commit] [-s <strategy>
 LF='
 '
 
-all_strategies='recursive octopus resolve stupid ours'
+all_strategies='recur recursive octopus resolve stupid ours'
 default_twohead_strategies='recursive'
 default_octopus_strategies='octopus'
 no_trivial_merge_strategies='ours'
@@ -18,7 +18,7 @@ use_strategies=
 
 index_merge=t
 if test "@@NO_PYTHON@@"; then
-	all_strategies='resolve octopus stupid ours'
+	all_strategies='recur resolve octopus stupid ours'
 	default_twohead_strategies='resolve'
 fi
 
diff --git a/graph.c b/graph.c
new file mode 100644
index 0000000..7458930
--- /dev/null
+++ b/graph.c
@@ -0,0 +1,346 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "cache.h"
+#include "commit.h"
+#include "graph.h"
+
+// does not belong here
+struct tree *git_write_tree()
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-write-tree\n",
+		getenv("GIT_INDEX_FILE"));
+#endif
+	FILE *fp = popen("git-write-tree 2>/dev/null", "r");
+	char buf[41];
+	unsigned char sha1[20];
+	int ch;
+	unsigned i = 0;
+	while ( (ch = fgetc(fp)) != EOF )
+		if ( i < sizeof(buf)-1 && ch >= '0' && ch <= 'f' )
+			buf[i++] = ch;
+		else
+			break;
+	int rc = pclose(fp);
+	if ( rc == -1 || WEXITSTATUS(rc) )
+		return NULL;
+	buf[i] = '\0';
+	if ( get_sha1(buf, sha1) != 0 )
+		return NULL;
+	return lookup_tree(sha1);
+}
+
+const char *node_title(struct node *node, int *len)
+{
+	const char *s = "(null commit)";
+	*len = strlen(s);
+
+	if ( node->virtual ) {
+		s = node->comment;
+		*len = strlen(s);
+	} else if ( node->commit ) {
+		if ( parse_commit(node->commit) != 0 ) {
+			s = "(bad commit)";
+			*len = strlen(s);
+		} else {
+			s = node->commit->buffer;
+			char prev = '\0';
+			while ( *s ) {
+				if ( '\n' == prev && '\n' == *s ) {
+					++s;
+					break;
+				}
+				prev = *s++;
+			}
+			*len = 0;
+			while ( s[*len] && '\n' != s[*len] )
+				++(*len);
+		}
+	}
+	return s;
+}
+
+const unsigned char *node_sha(const struct node *node)
+{
+	return node->commit->object.sha1;
+}
+
+const char *node_hex_sha1(const struct node *node)
+{
+	return node->commit ?  sha1_to_hex(node->commit->object.sha1):
+		node->virtual ? "virtual": "undefined";
+}
+
+static int sha_eq(const unsigned char *a, const unsigned char *b)
+{
+	if ( !a && !b )
+		return 2;
+	return a && b && memcmp(a, b, 20) == 0;
+}
+
+static struct node_list *node_list_pool = NULL;
+static unsigned node_list_pool_size = 0;
+
+unsigned node_list_count(const struct node_list *l)
+{
+	unsigned c = 0;
+	while ( l ) {
+		++c;
+		l = l->next;
+	}
+	return c;
+}
+
+struct node_list *node_list_insert(struct node *node, struct node_list **list)
+{
+	struct node_list *e;
+	if ( node_list_pool )
+		e = node_list_shift(&node_list_pool);
+	else
+		e = malloc(sizeof(struct node_list));
+	e->next = *list;
+	e->node = node;
+	*list = e;
+	return e;
+}
+
+struct node_list *node_list_free1(struct node_list *list, int free_nodes)
+{
+	struct node_list *next = list->next;
+	if ( free_nodes )
+		free(list->node);
+
+	if ( node_list_pool_size < 1000 ) {
+		list->node = NULL;
+		list->next = node_list_pool;
+		node_list_pool = list;
+	} else
+		free(list);
+	return next;
+}
+
+void node_list_free(struct node_list **list, int free_nodes)
+{
+	while ( *list )
+		node_list_free1(node_list_shift(list), free_nodes);
+}
+
+struct node *node_alloc(struct commit *commit)
+{
+	struct node *node = malloc(sizeof(struct node));
+	node->parents = NULL;
+	node->parents_count = 0;
+	node->children = NULL;
+	node->virtual = 0;
+	node->commit = commit;
+	if ( parse_commit(commit) == 0 )
+		node->tree = commit->tree;
+	else
+		die("failed to parse commit %s", sha1_to_hex(commit->object.sha1));
+
+	return node;
+}
+
+struct node *node_alloc_virtual(struct tree *tree, const char *comment)
+{
+	struct node *node = malloc(sizeof(struct node));
+	node->parents = NULL;
+	node->children = NULL;
+	node->commit = NULL;
+	node->tree = tree;
+	node->virtual = 1;
+	static unsigned virtual_id = 0;
+	node->virtual_id = virtual_id++;
+	node->comment = comment;
+	return node;
+}
+
+void node_set_parents(struct node *node, struct node_list *parents)
+{
+	struct node_list *p;
+	node->parents_count = 0;
+	for_each_node_list(p, node->parents = parents) {
+		node_list_insert(node, &p->node->children);
+		node->parents_count++;
+	}
+}
+
+static inline int node_eq(const struct node *a, const struct node *b)
+{
+	if ( a == b )
+		return 1;
+	if ( a->virtual != b->virtual )
+		return 0;
+	if ( a->virtual )
+		return a->virtual_id == b->virtual_id;
+	if ( a->commit == b->commit )
+		return 1;
+	return sha_eq(a->commit->object.sha1, b->commit->object.sha1);
+}
+
+struct node_list *node_list_find_node(const struct node *node,
+				      const struct node_list *list)
+{
+	const struct node_list *p;
+	for_each_node_list(p, list)
+		if ( node_eq(p->node, node) )
+			break;
+	return (struct node_list*)p;
+}
+
+static struct node_list *node_all_parents(struct node *node)
+{
+	struct node_list *parents = NULL, **set = &parents;
+	struct node_list *stack = NULL;
+	node_list_insert(node, &stack);
+	while ( stack ) {
+		struct node_list *el = node_list_shift(&stack);
+
+		node_list_insert(el->node, set);
+		set = &(*set)->next;
+
+		struct node_list *p = el->node->parents;
+		while ( p ) {
+			if ( !node_list_find_node(p->node, parents) )
+				node_list_insert(p->node, &stack);
+			p = p->next;
+		}
+		node_list_free1(el, 0);
+	}
+	return parents;
+}
+
+// a & b. a and are invalid after the call,
+// the result will contain all the common nodes
+struct node_list *node_list_intersect(struct node_list *a,
+				      struct node_list *b)
+{
+	struct node_list *result = NULL;
+	struct node_list *p = a;
+	while ( p ) {
+		struct node_list *next = p->next;
+		struct node_list *bn = node_list_find_node(p->node, b);
+		if ( bn ) {
+			p->next = result;
+			result = p;
+		} else
+			node_list_free1(p, 0);
+		p = next;
+	}
+	node_list_free(&b, 0);
+	return result;
+}
+
+struct node *graph_add_node(struct graph *graph, struct node *node)
+{
+	struct node_list **bucket;
+	if ( node->virtual )
+		// virtual nodes hashed by lowest byte of virtual_id
+		bucket = graph->commits + (node->virtual_id & 0xff);
+	else
+		bucket = graph->commits + node->commit->object.sha1[0];
+	node_list_insert(node, bucket);
+	return node;
+}
+
+struct node *graph_node_bysha(const struct graph *graph,
+			      const unsigned char *sha)
+{
+	const struct node_list *head = *(graph->commits + sha[0]);
+	while ( head ) {
+		if ( !head->node->virtual &&
+		     sha_eq(head->node->commit->object.sha1, sha) ) {
+			return head->node;
+		}
+		head = head->next;
+	}
+	return NULL;
+}
+
+// setup an empty tree in repository and returns it
+static struct tree *get_empty_tree()
+{
+	static struct tree *tree = NULL;
+	if ( !tree ) {
+		char *tmpindex = git_path("/merge-tmp-index");
+		setenv(INDEX_ENVIRONMENT, tmpindex, 1);
+		unlink(tmpindex);
+		tree = git_write_tree();
+		unlink(tmpindex);
+		if ( !tree )
+			die("failed to setup an empty tree");
+	}
+	return tree;
+}
+
+static struct node *addCommonRoot(struct graph *graph)
+{
+	struct node_list *roots = NULL;
+	struct node_list *p;
+	unsigned b;
+	for (b = 0; b < sizeof(graph->commits)/sizeof(graph->commits[0]); ++b) {
+		for_each_node_list(p, graph->commits[b])
+			if ( p->node->parents_count )
+				node_list_insert(p->node, &roots);
+	}
+	struct node *super = node_alloc_virtual(get_empty_tree(), "Root");
+	for_each_node_list(p, roots) {
+		struct node_list *list = NULL;
+		node_list_insert(super, &list);
+		node_set_parents(p->node, list);
+	}
+	graph_add_node(graph, super);
+	return super;
+}
+
+// getCommonAncestors: Find the common ancestors for commit1 and commit2
+struct node_list *graph_common_ancestors(struct graph *graph,
+					 struct node *commit1,
+					 struct node *commit2)
+{
+	struct node_list *shared;
+	shared = node_list_intersect(node_all_parents(commit1),
+				     node_all_parents(commit2));
+	if ( !shared )
+		node_list_insert(addCommonRoot(graph), &shared);
+
+	/*
+	   for s in shared:
+	   if len([c for c in s.children if c in shared]) == 0:
+	   res.add(s)
+	   return list(res)
+	   */
+	struct node_list *ca = NULL, **pca = &ca;
+	struct node_list *s;
+	for_each_node_list(s, shared) {
+		unsigned n = 0;
+		struct node_list *c;
+		for_each_node_list(c, s->node->children)
+			if ( node_list_find_node(c->node, shared) ) {
+				++n;
+				break;
+			}
+		if ( n == 0 ) {
+			node_list_insert(s->node, pca);
+			pca = &(*pca)->next;
+		}
+	}
+	node_list_free(&shared, 0);
+	return ca;
+}
+
+#if 0
+void node_list_print(const char *msg, const struct node_list *list)
+{
+    const struct node_list *p;
+    printf("%s\n", msg);
+    for_each_node_list(p, list) {
+	int len;
+	const char *msg = node_title(p->node, &len);
+	printf("\t%s %.*s\n", node_hex_sha1(p->node), len, msg);
+    }
+}
+#endif
+
+// vim: sw=8 noet
diff --git a/graph.h b/graph.h
new file mode 100644
index 0000000..2cdbd96
--- /dev/null
+++ b/graph.h
@@ -0,0 +1,85 @@
+#ifndef _GRAPH_H_
+#define _GRAPH_H_
+
+struct node;
+struct graph;
+struct commit;
+struct tree;
+struct commit_list;
+
+struct node_list
+{
+	struct node_list *next;
+	struct node *node;
+};
+
+struct node
+{
+	struct commit *commit;
+	struct tree *tree;
+
+	unsigned parents_count;
+	struct node_list *parents;
+	struct node_list *children;
+
+	unsigned virtual:1;
+	unsigned virtual_id;
+	const char *comment;
+};
+
+struct node_list *node_list_insert(struct node *node, struct node_list **list);
+void node_list_free(struct node_list **list, int free_nodes);
+struct node_list *node_list_free1(struct node_list *list, int free_node);
+
+static inline struct node_list *node_list_shift(struct node_list **list)
+{
+	struct node_list *head = *list;
+	*list = head->next;
+	return head;
+}
+
+static inline struct node *node_list_shift_node(struct node_list **list)
+{
+	struct node *node = (*list)->node;
+	*list = node_list_free1(*list, 0);
+	return node;
+}
+
+struct node *node_alloc(struct commit *);
+struct node *node_alloc_virtual(struct tree *, const char *comment);
+
+void node_set_parents(struct node *node, struct node_list *parents);
+unsigned node_list_count(const struct node_list *);
+
+struct node_list *node_list_find_node(const struct node *node,
+				      const struct node_list *list);
+
+
+// find the common ancestors for commit1 and commit2
+struct node_list *graph_common_ancestors(struct graph *graph,
+					 struct node *commit1,
+					 struct node *commit2);
+
+struct graph
+{
+	// hashed by first byte of SHA-1 or low byte of virtual_id
+	struct node_list *commits[256];
+};
+
+struct node *graph_add_node(struct graph *graph, struct node *node);
+struct node *graph_node_bysha(const struct graph *graph,
+			      const unsigned char *sha);
+
+#define for_each_node_list(p,list) \
+	for ( p = (list); p; p = p->next )
+
+const char *node_title(struct node *, int *len);
+const unsigned char *node_sha(const struct node *);
+const char *node_hex_sha1(const struct node *);
+void node_list_print(const char *msg, const struct node_list *list);
+
+struct tree *git_write_tree();
+
+#endif /* _GRAPH_H_ */
+
+// vim: sw=8 noet
diff --git a/merge-recursive.c b/merge-recursive.c
new file mode 100644
index 0000000..547150e
--- /dev/null
+++ b/merge-recursive.c
@@ -0,0 +1,1563 @@
+//
+// Recursive Merge algorithm stolen from git-merge-recursive.py by
+// Fredrik Kuivinen.
+//
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "cache.h"
+#include "commit.h"
+#include "blob.h"
+#include "tree-walk.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "run-command.h"
+
+#include "graph.h"
+#include "path-list.h"
+
+#define for_each_commit(p,list) for ( p = (list); p; p = p->next )
+
+struct merge_result
+{
+	struct node *commit;
+	unsigned clean:1;
+};
+
+struct merge_tree_result
+{
+	struct tree *tree;
+	unsigned clean:1;
+};
+
+struct index_entry
+{
+	struct index_entry *next;
+	struct
+	{
+		unsigned mode;
+		unsigned char sha[20];
+	} stages[4];
+	unsigned processed:1;
+	char path[1];
+};
+
+struct index_entry *index_entry_alloc(const char *path)
+{
+	size_t n = strlen(path); // index_entry::path has room for \0
+	struct index_entry *p = xmalloc(sizeof(struct index_entry) + n);
+	if ( !p )
+		return NULL;
+	memcpy(p->path, path, n + 1);
+	p->next = NULL;
+	p->processed = 0;
+	return p;
+}
+
+#if 0
+static
+void print_index_entry(const char *text, const struct index_entry *e)
+{
+    printf("%s%s next: %p %s\n", text,
+	   e ? e->path: NULL,
+	   e ? e->next: NULL,
+	   e && e->processed ? "processed": "");
+    if ( e ) {
+	int i;
+	for ( i = 1; i < 4; ++i )
+	    printf("\tstage[%d]: %06o %s\n", i,
+		   e->stages[i].mode,
+		   sha1_to_hex(e->stages[i].sha));
+    }
+}
+#endif
+
+static struct path_list *currentFileSet;
+static struct path_list *currentDirectorySet;
+
+static int output_indent = 0;
+
+static void output(const char *fmt, ...)
+{
+	va_list args;
+	int i;
+	for ( i = output_indent; i--; )
+		fputs("  ", stdout);
+	va_start(args, fmt);
+	vfprintf(stdout, fmt, args);
+	va_end(args);
+	fputc('\n', stdout);
+}
+
+static const char *original_index_file;
+static const char *temporary_index_file;
+
+static void setup_index(int temp)
+{
+	const char *idx = temp ? temporary_index_file: original_index_file;
+	unlink(temporary_index_file);
+	setenv("GIT_INDEX_FILE", idx, 1);
+}
+
+// This is a global variable which is used in a number of places but
+// only written to in the 'merge' function.
+
+// index_only == 1    => Don't leave any non-stage 0 entries in the cache and
+//                       don't update the working directory.
+//               0    => Leave unmerged entries in the cache and update
+//                       the working directory.
+static int index_only = 0; // cacheOnly
+
+static int git_read_tree(const struct tree *tree)
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-read-tree %s\n",
+		getenv("GIT_INDEX_FILE"),
+		sha1_to_hex(tree->object.sha1));
+#endif
+	const char *argv[] = { "git-read-tree", NULL, NULL, };
+	argv[1] = sha1_to_hex(tree->object.sha1);
+	int rc = run_command_v(2, argv);
+	return rc < 0 ? -1: rc;
+}
+
+static int git_merge_trees(const char *update_arg,
+			   struct tree *common,
+			   struct tree *head,
+			   struct tree *merge)
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-read-tree %s -m %s %s %s\n",
+		getenv("GIT_INDEX_FILE"),
+		update_arg,
+		sha1_to_hex(common->object.sha1),
+		sha1_to_hex(head->object.sha1),
+		sha1_to_hex(merge->object.sha1));
+#endif
+	const char *argv[] = {
+		"git-read-tree", NULL, "-m", NULL, NULL, NULL,
+		NULL,
+	};
+	argv[1] = update_arg;
+	argv[3] = sha1_to_hex(common->object.sha1);
+	argv[4] = sha1_to_hex(head->object.sha1);
+	argv[5] = sha1_to_hex(merge->object.sha1);
+	int rc = run_command_v(6, argv);
+	return rc < 0 ? -1: rc;
+}
+
+struct merge_tree_result merge_trees(struct tree *head,
+				     struct tree *merge,
+				     struct tree *common,
+				     const char *branch1Name,
+				     const char *branch2Name);
+
+
+// The entry point to the merge code
+
+// Merge the commits h1 and h2, return the resulting virtual
+// commit object and a flag indicating the cleaness of the merge.
+static
+struct merge_result merge(struct node *h1,
+			  struct node *h2,
+			  const char *branch1Name,
+			  const char *branch2Name,
+			  struct graph *graph,
+			  int callDepth /* =0 */,
+			  struct node *ancestor /* =None */)
+{
+	struct merge_result result = { NULL, 0 };
+
+	const char *msg;
+	int msglen;
+	output("Merging:");
+	msg = node_title(h1, &msglen);
+	output("%s %.*s", node_hex_sha1(h1), msglen, msg);
+	msg = node_title(h2, &msglen);
+	output("%s %.*s", node_hex_sha1(h2), msglen, msg);
+	if ( !ancestor && !graph )
+		die("graph is not initialized");
+	struct node_list *ca = NULL;
+	if ( ancestor )
+		node_list_insert(ancestor, &ca);
+	else
+		ca = graph_common_ancestors(graph, h1, h2);
+
+	output("found %u common ancestor(s):", node_list_count(ca));
+	struct node_list *x;
+	for_each_node_list(x,ca) {
+		msg = node_title(x->node, &msglen);
+		output("%s %.*s", node_hex_sha1(x->node), msglen, msg);
+	}
+
+	struct node *mergedCA = node_list_shift_node(&ca);
+
+	struct node_list *h;
+	for_each_commit(h,ca) {
+		output_indent = callDepth + 1;
+		result = merge(mergedCA, h->node,
+			       "Temporary merge branch 1",
+			       "Temporary merge branch 2",
+			       graph,
+			       callDepth + 1,
+			       NULL);
+		mergedCA = result.commit;
+		output_indent = callDepth;
+
+		if ( !mergedCA )
+			die("merge returned no commit");
+	}
+
+	if ( callDepth == 0 ) {
+		setup_index(0);
+		index_only = 0;
+	} else {
+		setup_index(1);
+		git_read_tree(h1->tree);
+		index_only = 1;
+	}
+
+	struct merge_tree_result mtr;
+	mtr = merge_trees(h1->tree, h2->tree,
+			  mergedCA->tree, branch1Name, branch2Name);
+
+	if ( graph && (mtr.clean || index_only) ) {
+		result.commit = node_alloc_virtual(mtr.tree, "merged tree");
+		struct node_list *parents = NULL;
+		node_list_insert(h1, &parents);
+		node_list_insert(h2, &parents->next);
+		node_set_parents(result.commit, parents);
+		graph_add_node(graph, result.commit);
+	} else
+		result.commit = NULL;
+
+	result.clean = mtr.clean;
+	return result;
+}
+
+#define READ_TREE_FOUND 2
+typedef int (*read_tree_rt_fn_t)(const char *sha1,
+				 const char *path,
+				 unsigned mode,
+				 void *data);
+
+// git-ls-tree -r -t <tree>
+static int read_tree_rt(struct tree *tree,
+			const char *base,
+			int baselen,
+			read_tree_rt_fn_t fn,
+			void *data)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+
+	if (parse_tree(tree))
+		return -1;
+
+	desc.buf = tree->buffer;
+	desc.size = tree->size;
+
+	while ( tree_entry(&desc, &entry) ) {
+		int retval;
+		char *path = xmalloc(baselen + entry.pathlen + 2);
+		memcpy(path, base, baselen);
+		memcpy(path + baselen, entry.path, entry.pathlen);
+		path[baselen + entry.pathlen] = '\0';
+
+		switch ( retval = fn(entry.sha1, path, entry.mode, data) ) {
+		case READ_TREE_RECURSIVE:
+			break;
+		case 0:
+			free(path);
+			continue;
+		default:
+			free(path);
+			return retval;
+		}
+		if (S_ISDIR(entry.mode)) {
+			path[baselen + entry.pathlen] = '/';
+			path[baselen + entry.pathlen + 1] = '\0';
+			retval = read_tree_rt(lookup_tree(entry.sha1),
+					      path,
+					      baselen + entry.pathlen + 1,
+					      fn, data);
+			path[baselen + entry.pathlen] = '\0';
+			if (retval) {
+				free(path);
+				return retval;
+			}
+		}
+		free(path);
+	}
+	return 0;
+}
+
+struct files_and_dirs
+{
+	struct path_list **files;
+	struct path_list **dirs;
+};
+
+static int save_files_dirs(const char *sha1,
+			   const char *path_,
+			   unsigned mode,
+			   void *data_)
+{
+	struct files_and_dirs *data = data_;
+	char *path = strdup(path_);
+
+	if (S_ISDIR(mode)) {
+		path_list_insert(path, data->dirs);
+		data->dirs = &(*data->dirs)->next;
+	} else {
+		path_list_insert(path, data->files);
+		data->files = &(*data->files)->next;
+	}
+	return READ_TREE_RECURSIVE;
+}
+
+int getFilesAndDirs(struct tree *tree,
+		    struct path_list **files,
+		    struct path_list **dirs)
+{
+	struct files_and_dirs data;
+	path_list_clear(files, 1);
+	path_list_clear(dirs, 1);
+	data.files = files;
+	data.dirs = dirs;
+	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 )
+		return 0;
+	return path_list_count(*files) + path_list_count(*dirs);
+}
+
+struct index_entry *index_entry_find(struct index_entry *ents, const char *path)
+{
+	struct index_entry *e;
+	for ( e = ents; e; e = e->next )
+		if ( strcmp(e->path, path) == 0 )
+			break;
+	return e;
+}
+
+struct index_entry *index_entry_get(struct index_entry **ents, const char *path)
+{
+	struct index_entry *e, **tail = ents;
+	for ( e = *ents; e; e = e->next ) {
+		if ( strcmp(e->path, path) == 0 )
+			return e;
+		tail = &e->next;
+	}
+	e = index_entry_alloc(path);
+	memset(e->stages, 0, sizeof(e->stages));
+	return *tail = e;
+}
+
+struct find_entry
+{
+	const char *path;
+	unsigned char *sha;
+	unsigned *mode;
+};
+
+static int find_entry(const char *sha,
+		      const char *path,
+		      unsigned mode,
+		      void *data_)
+{
+	struct find_entry *data = data_;
+	if ( strcmp(path, data->path) == 0 ) {
+		memcpy(data->sha, sha, 20);
+		*data->mode = mode;
+		return READ_TREE_FOUND;
+	}
+	return READ_TREE_RECURSIVE;
+}
+
+// Returns a CacheEntry object which doesn't have to correspond to
+// a real cache entry in Git's index.
+struct index_entry *index_entry_from_db(const char *path,
+					struct tree *o,
+					struct tree *a,
+					struct tree *b)
+{
+	struct index_entry *e = index_entry_alloc(path);
+	struct find_entry data;
+	data.path = path;
+	data.sha = e->stages[1].sha;
+	data.mode = &e->stages[1].mode;
+	if ( read_tree_rt(o, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "1: %s:%s not found\n",
+		// 	sha1_to_hex(o->object.sha1), path);
+		memcpy(e->stages[1].sha, null_sha1, 20);
+		e->stages[1].mode = 0;
+	}
+	data.sha = e->stages[2].sha;
+	data.mode = &e->stages[2].mode;
+	if ( read_tree_rt(a, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "2: %s:%s not found\n",
+		// 	sha1_to_hex(a->object.sha1), path);
+		memcpy(e->stages[2].sha, null_sha1, 20);
+		e->stages[2].mode = 0;
+	}
+	data.sha = e->stages[3].sha;
+	data.mode = &e->stages[3].mode;
+	if ( read_tree_rt(b, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "3: %s:%s not found\n",
+		// 	sha1_to_hex(b->object.sha1), path);
+		memcpy(e->stages[3].sha, null_sha1, 20);
+		e->stages[3].mode = 0;
+	}
+	return e;
+}
+
+void free_index_entries(struct index_entry **ents)
+{
+	while ( *ents ) {
+		struct index_entry *next = (*ents)->next;
+		free(*ents);
+		*ents = next;
+	}
+}
+
+static int fget_mode(unsigned *mode, FILE *fp, int *ch)
+{
+	int p;
+	char buf[8];
+	for ( p = 0; (*ch = fgetc(fp)) != EOF && p < 6; ) {
+		if ( *ch == '\x20' || *ch == '\t' || *ch == '\n' || *ch == '\r' )
+			break;
+		if ( *ch < '0' || *ch > '7' )
+			return -1;
+		buf[p++] = *ch;
+	}
+	buf[p] = '\0';
+	*mode = strtoul(buf, 0, 8);
+	return 0;
+}
+
+static int fget_sha1(unsigned char *sha, FILE *fp, int *ch)
+{
+	char buf[40];
+	int p;
+	for ( p = 0; (*ch = fgetc(fp)) != EOF && p < 40; ) {
+		if ( ('0' <= *ch && *ch <= '9') ||
+		     ('a' <= *ch && *ch <= 'f') ||
+		     ('A' <= *ch && *ch <= 'F') )
+			buf[p++] = *ch;
+		else
+			return -1;
+	}
+	if ( p != 40 || get_sha1_hex(buf, sha) == -1 )
+		return -1;
+	return 0;
+}
+// Create a dictionary mapping file names to CacheEntry objects. The
+// dictionary contains one entry for every path with a non-zero stage entry.
+struct index_entry *unmergedCacheEntries()
+{
+	struct index_entry *unmerged = NULL;
+	FILE *fp = popen("git-ls-files -z --unmerged", "r");
+	if ( !fp )
+		return NULL;
+	int ch;
+	while ( !feof(fp) ) {
+		unsigned mode;
+		unsigned char sha[20];
+		char stage = '0';
+		char path[PATH_MAX];
+		int p;
+		// mode
+		if ( fget_mode(&mode, fp, &ch) )
+			goto wait_eol;
+		if ( '\x20' != ch )
+			goto wait_eol;
+		// SHA1
+		if ( fget_sha1(sha, fp, &ch) )
+			goto wait_eol;
+		if ( '\x20' != ch )
+			goto wait_eol;
+		// stage
+		if ( (ch = fgetc(fp)) != EOF ) {
+			stage = ch;
+			if ( ch < '1' || ch > '3' )
+				goto wait_eol;
+		}
+		if ( (ch = fgetc(fp)) == EOF || '\t' != ch )
+			goto wait_eol;
+		// path
+		for ( p = 0; (ch = fgetc(fp)) != EOF; ++p ) {
+			path[p] = ch;
+			if ( !ch )
+				break;
+			if ( p == sizeof(path) - 1 ) {
+				path[p] = '\0';
+				error("path too long: %s", path);
+				goto wait_eol;
+			}
+		}
+		if ( ch )
+			goto wait_eol;
+		// printf("unmerged %08o %s %c %s\n",mode,sha1_to_hex(sha),stage,path);
+		struct index_entry *e = index_entry_get(&unmerged, path);
+		e->stages[stage - '1' + 1].mode = mode;
+		memcpy(e->stages[stage - '1' + 1].sha, sha, 20);
+		continue;
+	wait_eol:
+		while ( (ch = fgetc(fp)) != EOF && ch );
+	}
+	pclose(fp);
+	return unmerged;
+}
+
+struct rename_entry
+{
+	struct rename_entry *next;
+
+	char *src;
+	unsigned char src_sha[20];
+	unsigned src_mode;
+	struct index_entry *src_entry;
+
+	char *dst;
+	unsigned char dst_sha[20];
+	unsigned dst_mode;
+	struct index_entry *dst_entry;
+
+	unsigned score;
+	unsigned processed:1;
+};
+
+struct rename_entry *find_rename_bysrc(struct rename_entry *e,
+				       const char *name)
+{
+	while ( e ) {
+		if ( strcmp(e->src, name) == 0 )
+			break;
+		e = e->next;
+	}
+	return e;
+}
+
+struct rename_entry *find_rename_bydst(struct rename_entry *e,
+				       const char *name)
+{
+	while ( e ) {
+		if ( strcmp(e->dst, name) == 0 )
+			break;
+		e = e->next;
+	}
+	return e;
+}
+
+void rename_entry_free(struct rename_entry *p)
+{
+	free(p->src);
+	free(p->dst);
+	free(p);
+}
+
+void free_rename_entries(struct rename_entry **list)
+{
+	while ( *list ) {
+		struct rename_entry *next = (*list)->next;
+		rename_entry_free(*list);
+		*list = next;
+	}
+}
+
+// Get information of all renames which occured between 'oTree' and
+// 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
+// 'bTree') to be able to associate the correct cache entries with
+// the rename information. 'tree' is always equal to either aTree or bTree.
+struct rename_entry *getRenames(struct tree *tree,
+				struct tree *oTree,
+				struct tree *aTree,
+				struct tree *bTree,
+				struct index_entry **entries)
+{
+	struct rename_entry *renames = NULL;
+	struct rename_entry **rptr = &renames;
+	struct diff_options opts;
+	diff_setup(&opts);
+	opts.recursive = 1;
+	opts.detect_rename = DIFF_DETECT_RENAME;
+	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+	if (diff_setup_done(&opts) < 0)
+		die("diff setup failed");
+	diff_tree_sha1(oTree->object.sha1, tree->object.sha1, "", &opts);
+	diffcore_std(&opts);
+	int i;
+	for (i = 0; i < diff_queued_diff.nr; ++i) {
+		struct rename_entry *re;
+		struct diff_filepair *pair = diff_queued_diff.queue[i];
+		if (pair->status != 'R')
+			continue;
+		re = xmalloc(sizeof(*re));
+		re->next = NULL;
+		re->processed = 0;
+		re->score = pair->score;
+		memcpy(re->src_sha, pair->one->sha1, 20);
+		re->src = strdup(pair->one->path);
+		re->src_mode = pair->one->mode;
+		memcpy(re->dst_sha, pair->two->sha1, 20);
+		re->dst = strdup(pair->two->path);
+		re->dst_mode = pair->two->mode;
+		re->src_entry = index_entry_find(*entries, re->src);
+		if ( !re->src_entry ) {
+			re->src_entry = index_entry_from_db(re->src, oTree, aTree, bTree);
+			re->src_entry->next = *entries;
+			*entries = re->src_entry;
+		}
+		re->dst_entry = index_entry_find(*entries, re->dst);
+		if ( !re->dst_entry ) {
+			re->dst_entry = index_entry_from_db(re->dst, oTree, aTree, bTree);
+			re->dst_entry->next = *entries;
+			*entries = re->dst_entry;
+		}
+		*rptr = re;
+		rptr = &re->next;
+	}
+	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+	diff_flush(&opts);
+	return renames;
+}
+
+static FILE *git_update_index_pipe()
+{
+	return popen("git-update-index -z --index-info", "w");
+}
+
+int setIndexStages(FILE *fp,
+		   const char *path,
+		   unsigned char *osha, unsigned omode,
+		   unsigned char *asha, unsigned amode,
+		   unsigned char *bsha, unsigned bmode,
+		   int clear /* =True */)
+{
+	if ( !fp )
+		return -1;
+	if ( clear ) {
+		fprintf(fp, "0 %s\t%s", sha1_to_hex(null_sha1), path);
+		fputc('\0', fp);
+	}
+	if ( omode ) {
+		fprintf(fp, "0%o %s 1\t%s", omode, sha1_to_hex(osha), path);
+		fputc('\0', fp);
+	}
+	if ( amode ) {
+		fprintf(fp, "0%o %s 2\t%s", amode, sha1_to_hex(asha), path);
+		fputc('\0', fp);
+	}
+	if ( bmode ) {
+		fprintf(fp, "0%o %s 3\t%s", bmode, sha1_to_hex(bsha), path);
+		fputc('\0', fp);
+	}
+	return 0;
+}
+
+static int remove_path(const char *name)
+{
+	int ret;
+	char *slash;
+
+	ret = unlink(name);
+	if ( ret )
+		return ret;
+	int len = strlen(name);
+	char *dirs = malloc(len+1);
+	memcpy(dirs, name, len);
+	dirs[len] = '\0';
+	while ( (slash = strrchr(name, '/')) ) {
+		*slash = '\0';
+		len = slash - name;
+		if ( rmdir(name) != 0 )
+			break;
+	}
+	free(dirs);
+	return ret;
+}
+
+int removeFile(FILE *fp, int clean, const char *path)
+{
+	int updateCache = index_only || clean;
+	int updateWd = !index_only;
+
+	if ( updateCache ) {
+		if ( !fp )
+			return -1;
+		fprintf(fp, "0 %s\t%s", sha1_to_hex(null_sha1), path);
+		fputc('\0', fp);
+		return 0;
+	}
+	if ( updateWd )
+	{
+		unlink(path);
+		if ( errno != ENOENT || errno != EISDIR )
+			return -1;
+		remove_path(path);
+	}
+	return 0;
+}
+
+char *uniquePath(const char *path, const char *branch)
+{
+	char *newpath = xmalloc(strlen(path) + 1 + strlen(branch) + 8 + 1);
+	strcpy(newpath, path);
+	strcat(newpath, "~");
+	char *p = newpath + strlen(newpath);
+	strcpy(p, branch);
+	for ( ; *p; ++p )
+		if ( '/' == *p )
+			*p = '_';
+	int suffix = 0;
+	struct stat st;
+	while ( path_list_has_path(currentFileSet, newpath) ||
+		path_list_has_path(currentDirectorySet, newpath) ||
+		lstat(newpath, &st) == 0 ) {
+		sprintf(p, "_%d", suffix++);
+	}
+	path_list_insert(newpath, &currentFileSet);
+	return newpath;
+}
+
+int mkdir_p(const char *path, unsigned long mode, int create_last)
+{
+	char *buf = strdup(path);
+	char *p;
+
+	for ( p = buf; *p; ++p ) {
+		if ( *p != '/' )
+			continue;
+		*p = '\0';
+		if (mkdir(buf, mode)) {
+			int e = errno;
+			if ( e == EEXIST ) {
+				struct stat st;
+				if ( !stat(buf, &st) && S_ISDIR(st.st_mode) )
+					goto next; /* ok */
+				errno = e;
+			}
+			free(buf);
+			return -1;
+		}
+	next:
+		*p = '/';
+	}
+	free(buf);
+	if ( create_last && mkdir(path, mode) )
+		return -1;
+	return 0;
+}
+
+/* stolen from builtin-cat-file.c */
+static void flush_buffer(int fd, const char *buf, unsigned long size)
+{
+	while (size > 0) {
+		long ret = xwrite(fd, buf, size);
+		if (ret < 0) {
+			/* Ignore epipe */
+			if (errno == EPIPE)
+				break;
+			die("git-cat-file: %s", strerror(errno));
+		} else if (!ret) {
+			die("git-cat-file: disk full?");
+		}
+		size -= ret;
+		buf += ret;
+	}
+}
+
+void updateFileExt(FILE *fp,
+		   const unsigned char *sha,
+		   unsigned mode,
+		   const char *path,
+		   int updateCache,
+		   int updateWd)
+{
+	if ( index_only )
+		updateWd = 0;
+
+	if ( updateWd ) {
+		char type[20];
+		void *buf;
+		unsigned long size;
+
+		buf = read_sha1_file(sha, type, &size);
+		if (!buf)
+			die("cannot read object %s '%s'", sha1_to_hex(sha), path);
+		if ( strcmp(type, blob_type) != 0 )
+			die("blob expected for %s '%s'", sha1_to_hex(sha), path);
+
+		if ( S_ISREG(mode) ) {
+			if ( mkdir_p(path, 0777, 0 /* don't create last element */) )
+				die("failed to create path %s: %s", path, strerror(errno));
+			unlink(path);
+			if ( mode & 0100 )
+				mode = 0777;
+			else
+				mode = 0666;
+			int fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, mode);
+			if ( fd < 0 )
+				die("failed to open %s: %s", path, strerror(errno));
+			flush_buffer(fd, buf, size);
+			close(fd);
+		} else if ( S_ISLNK(mode) ) {
+			char *linkTarget = malloc(size + 1);
+			memcpy(linkTarget, buf, size);
+			linkTarget[size] = '\0';
+			mkdir_p(path, 0777, 0);
+			symlink(linkTarget, path);
+		} else
+			die("do not know what to do with %06o %s '%s'",
+			    mode, sha1_to_hex(sha), path);
+	}
+	if ( updateCache )
+	{
+		// XXX just always use "git update-index --index-info"?
+		fprintf(fp, "%06o %s\t%s", mode, sha1_to_hex(sha), path);
+		fputc('\0', fp);
+	}
+}
+
+void updateFile(FILE *fp,
+		int clean,
+		const unsigned char *sha,
+		unsigned mode,
+		const char *path)
+{
+	updateFileExt(fp, sha, mode, path, index_only || clean, !index_only);
+}
+
+// Low level file merging, update and removal
+// ------------------------------------------
+struct merge_file_info
+{
+	unsigned char sha[20];
+	unsigned mode;
+	unsigned clean:1;
+	unsigned merge:1;
+};
+
+static char *git_unpack_file(unsigned char *sha1, char *path)
+{
+	void *buf;
+	char type[20];
+	unsigned long size;
+	int fd;
+
+	buf = read_sha1_file(sha1, type, &size);
+	if (!buf || strcmp(type, blob_type))
+		die("unable to read blob object %s", sha1_to_hex(sha1));
+
+	strcpy(path, ".merge_file_XXXXXX");
+	fd = mkstemp(path);
+	if (fd < 0)
+		die("unable to create temp-file");
+	if (write(fd, buf, size) != size)
+		die("unable to write temp-file");
+	close(fd);
+	return path;
+}
+
+struct merge_file_info
+mergeFile(const char *oPath, unsigned char *oSha, unsigned oMode,
+	  const char *aPath, unsigned char *aSha, unsigned aMode,
+	  const char *bPath, unsigned char *bSha, unsigned bMode,
+	  const char *branch1Name, const char *branch2Name)
+{
+	struct merge_file_info result;
+	result.merge = 0;
+	result.clean = 1;
+
+	if ( (S_IFMT & aMode) != (S_IFMT & bMode) ) {
+		result.clean = 0;
+		if ( S_ISREG(aMode) ) {
+			result.mode = aMode;
+			memcpy(result.sha, aSha, 20);
+		} else {
+			result.mode = bMode;
+			memcpy(result.sha, bSha, 20);
+		}
+	} else {
+		if ( memcmp(aSha, oSha, 20) != 0 && memcmp(bSha, oSha, 20) != 0 )
+			result.merge = 1;
+
+		result.mode = aMode == oMode ? bMode: aMode;
+
+		if ( memcmp(aSha, oSha, 20) == 0 )
+			memcpy(result.sha, bSha, 20);
+		else if ( memcmp(bSha, oSha, 20) == 0 )
+			memcpy(result.sha, aSha, 20);
+		else if ( S_ISREG(aMode) ) {
+
+			int code = 1;
+			char orig[PATH_MAX];
+			char src1[PATH_MAX];
+			char src2[PATH_MAX];
+
+			git_unpack_file(oSha, orig);
+			git_unpack_file(aSha, src1);
+			git_unpack_file(bSha, src2);
+
+			const char *argv[] = {
+				"merge", "-L", NULL, "-L", NULL, "-L", NULL,
+				src1, orig, src2,
+				NULL
+			};
+			char *la, *lb, *lo;
+			argv[2] = la = malloc(strlen(branch1Name) + 2 + strlen(aPath));
+			strcat(strcat(strcpy(la, branch1Name), "/"), aPath);
+			argv[6] = lb = malloc(strlen(branch2Name) + 2 + strlen(bPath));
+			strcat(strcat(strcpy(lb, branch2Name), "/"), bPath);
+			argv[4] = lo = malloc(7 + strlen(oPath));
+			strcat(strcpy(lo, "orig/"), oPath);
+
+#if 0
+			printf("%s %s %s %s %s %s %s %s %s %s\n",
+			       argv[0], argv[1], argv[2], argv[3], argv[4],
+			       argv[5], argv[6], argv[7], argv[8], argv[9]);
+#endif
+			code = run_command_v(10, argv);
+
+			free(la);
+			free(lb);
+			free(lo);
+			if ( code && code < -256 ) {
+				die("Failed to execute 'merge'. merge(1) is used as the "
+				    "file-level merge tool. Is 'merge' in your path?");
+			}
+			char cmd[PATH_MAX];
+			snprintf(cmd, sizeof(cmd), "git-hash-object -t blob -w %s", src1);
+			FILE *fp = popen(cmd, "r");
+			if ( !fp )
+				die("cannot run git-hash-object: %s", strerror(errno));
+			int ch;
+			if ( fget_sha1(result.sha, fp, &ch) )
+				die("invalid output from git-hash-object");
+			pclose(fp);
+
+			unlink(orig);
+			unlink(src1);
+			unlink(src2);
+
+			result.clean = WEXITSTATUS(code) == 0;
+		} else {
+			if ( !(S_ISLNK(aMode) || S_ISLNK(bMode)) )
+				die("cannot merge modes?");
+
+			memcpy(result.sha, aSha, 20);
+
+			if ( memcmp(aSha, bSha, 20) != 0 )
+				result.clean = 0;
+		}
+	}
+
+	return result;
+}
+
+static void memswp(void *p1, void *p2, unsigned n)
+{
+	unsigned char *a = p1, *b = p2;
+	while ( n-- ) {
+		*a ^= *b;
+		*b ^= *a;
+		*a ^= *b;
+		++a;
+		++b;
+	}
+}
+
+int processRenames(struct rename_entry *renamesA,
+		   struct rename_entry *renamesB,
+		   const char *branchNameA,
+		   const char *branchNameB)
+{
+	int cleanMerge = 1;
+	//    printf("process renames %s:%s -> %s:%s\n",
+	//	   branchNameA, renamesA ? renamesA->src: "(none)",
+	//	   branchNameB, renamesB ? renamesB->dst: "(none)");
+
+	struct path_list *srcNames = NULL;
+	const struct rename_entry *sre;
+
+	for (sre = renamesA; sre; sre = sre->next)
+		if (!path_list_has_path(srcNames, sre->src))
+			path_list_insert(sre->src, &srcNames);
+	for (sre = renamesB; sre; sre = sre->next)
+		if (!path_list_has_path(srcNames, sre->src))
+			path_list_insert(sre->src, &srcNames);
+
+	FILE *fp = git_update_index_pipe();
+	struct path_list *src;
+	for_each_path(src,srcNames) {
+		struct rename_entry *renames1, *renames2, *ren1, *ren2;
+		const char *branchName1, *branchName2;
+		ren1 = find_rename_bysrc(renamesA, src->path);
+		ren2 = find_rename_bysrc(renamesB, src->path);
+		if ( ren1 ) {
+			renames1 = renamesA;
+			renames2 = renamesB;
+			branchName1 = branchNameA;
+			branchName2 = branchNameB;
+		} else {
+			renames1 = renamesB;
+			renames2 = renamesA;
+			branchName1 = branchNameB;
+			branchName2 = branchNameA;
+			struct rename_entry *tmp = ren2;
+			ren2 = ren1;
+			ren1 = tmp;
+		}
+
+		ren1->dst_entry->processed = 1;
+		ren1->src_entry->processed = 1;
+
+		if ( ren1->processed )
+			continue;
+		ren1->processed = 1;
+
+		if ( ren2 ) {
+			// Renamed in 1 and renamed in 2
+			if ( strcmp(ren1->src, ren2->src) != 0 )
+				die("ren1.srcName != ren2.srcName");
+			ren2->dst_entry->processed = 1;
+			ren2->processed = 1;
+			if ( strcmp(ren1->dst, ren2->dst) != 0 ) {
+				output("CONFLICT (rename/rename): "
+				       "Rename %s->%s in branch %s "
+				       "rename %s->%s in %s",
+				       src->path, ren1->dst, branchName1,
+				       src->path, ren2->dst, branchName2);
+				cleanMerge = 0;
+				char *dstName1 = ren1->dst, *dstName2 = ren2->dst;
+				if ( path_list_has_path(currentDirectorySet, ren1->dst) ) {
+					dstName1 = uniquePath(ren1->dst, branchName1);
+					output("%s is a directory in %s adding as %s instead",
+					       ren1->dst, branchName2, dstName1);
+					removeFile(fp, 0, ren1->dst);
+				}
+				if ( path_list_has_path(currentDirectorySet, ren2->dst) ) {
+					dstName2 = uniquePath(ren2->dst, branchName2);
+					output("%s is a directory in %s adding as %s instead",
+					       ren2->dst, branchName1, dstName2);
+					removeFile(fp, 0, ren2->dst);
+				}
+				setIndexStages(fp, dstName1,
+					       NULL, 0,
+					       ren1->dst_sha, ren1->dst_mode,
+					       NULL, 0,
+					       1 /* clear */);
+				setIndexStages(fp, dstName2,
+					       NULL, 0,
+					       NULL, 0,
+					       ren2->dst_sha, ren2->dst_mode,
+					       1 /* clear */);
+			} else {
+				removeFile(fp, 1, ren1->src);
+				struct merge_file_info mfi;
+				mfi = mergeFile(ren1->src, ren1->src_sha, ren1->src_mode,
+						ren1->dst, ren1->dst_sha, ren1->dst_mode,
+						ren2->dst, ren2->dst_sha, ren2->dst_mode,
+						branchName1, branchName2);
+				if ( mfi.merge || !mfi.clean )
+					output("Renaming %s->%s", src->path, ren1->dst);
+
+				if ( mfi.merge )
+					output("Auto-merging %s", ren1->dst);
+
+				if ( !mfi.clean ) {
+					output("CONFLICT (content): merge conflict in %s",
+					       ren1->dst);
+					cleanMerge = 0;
+
+					if ( !index_only )
+						setIndexStages(fp,
+							       ren1->dst,
+							       ren1->src_sha, ren1->src_mode,
+							       ren1->dst_sha, ren1->dst_mode,
+							       ren2->dst_sha, ren2->dst_mode,
+							       1 /* clear */);
+				}
+				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
+			}
+		} else {
+			// Renamed in 1, maybe changed in 2
+			removeFile(fp, 1, ren1->src);
+
+			unsigned char srcShaOtherBranch[20], dstShaOtherBranch[20];
+			unsigned srcModeOtherBranch, dstModeOtherBranch;
+
+			int stage = renamesA == renames1 ? 3: 2;
+
+			memcpy(srcShaOtherBranch, ren1->src_entry->stages[stage].sha, 20);
+			srcModeOtherBranch = ren1->src_entry->stages[stage].mode;
+
+			memcpy(dstShaOtherBranch, ren1->dst_entry->stages[stage].sha, 20);
+			dstModeOtherBranch = ren1->dst_entry->stages[stage].mode;
+
+			int tryMerge = 0;
+			char *newPath;
+			struct rename_entry *dst2;
+
+			if ( path_list_has_path(currentDirectorySet, ren1->dst) ) {
+				newPath = uniquePath(ren1->dst, branchName1);
+				output("CONFLICT (rename/directory): Rename %s->%s in %s "
+				       " directory %s added in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       ren1->dst, branchName2);
+				output("Renaming %s to %s instead", ren1->src, newPath);
+				cleanMerge = 0;
+				removeFile(fp, 0, ren1->dst);
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, newPath);
+			} else if ( memcmp(srcShaOtherBranch, null_sha1, 20) == 0 ) {
+				output("CONFLICT (rename/delete): Rename %s->%s in %s "
+				       "and deleted in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       branchName2);
+				cleanMerge = 0;
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, ren1->dst);
+			} else if ( memcmp(dstShaOtherBranch, null_sha1, 20) != 0 ) {
+				newPath = uniquePath(ren1->dst, branchName2);
+				output("CONFLICT (rename/add): Rename %s->%s in %s. "
+				       "%s added in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       ren1->dst, branchName2);
+				output("Adding as %s instead", newPath);
+				updateFile(fp, 0, dstShaOtherBranch, dstModeOtherBranch, newPath);
+				cleanMerge = 0;
+				tryMerge = 1;
+			} else if ( (dst2 = find_rename_bydst(renames2, ren1->dst)) ) {
+				char *newPath1 = uniquePath(ren1->dst, branchName1);
+				char *newPath2 = uniquePath(dst2->dst, branchName2);
+				output("CONFLICT (rename/rename): Rename %s->%s in %s. "
+				       "Rename %s->%s in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       dst2->src, dst2->dst, branchName2);
+				output("Renaming %s to %s and %s to %s instead",
+				       ren1->src, newPath1, dst2->src, newPath2);
+				removeFile(fp, 0, ren1->dst);
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, newPath1);
+				updateFile(fp, 0, dst2->dst_sha, dst2->dst_mode, newPath2);
+				dst2->processed = 1;
+				cleanMerge = 0;
+			} else
+				tryMerge = 1;
+
+			if ( tryMerge ) {
+				char *oname = ren1->src;
+				char *aname = ren1->dst;
+				char *bname = ren1->src;
+				unsigned char osha[20], asha[20], bsha[20];
+				unsigned omode = ren1->src_mode;
+				unsigned amode = ren1->dst_mode;
+				unsigned bmode = srcModeOtherBranch;
+				memcpy(osha, ren1->src_sha, 20);
+				memcpy(asha, ren1->dst_sha, 20);
+				memcpy(bsha, srcShaOtherBranch, 20);
+				const char *aBranch = branchName1;
+				const char *bBranch = branchName2;
+
+				if ( renamesA != renames1 ) {
+					memswp(&aname, &bname, sizeof(aname));
+					memswp(asha, bsha, 20);
+					memswp(&aBranch, &bBranch, sizeof(aBranch));
+				}
+				struct merge_file_info mfi;
+				mfi = mergeFile(oname, osha, omode,
+						aname, asha, amode,
+						bname, bsha, bmode,
+						aBranch, bBranch);
+
+				if ( mfi.merge || !mfi.clean )
+					output("Renaming %s => %s", ren1->src, ren1->dst);
+				if ( mfi.merge )
+					output("Auto-merging %s", ren1->dst);
+				if ( !mfi.clean ) {
+					output("CONFLICT (rename/modify): Merge conflict in %s",
+					       ren1->dst);
+					cleanMerge = 0;
+
+					if ( !index_only )
+						setIndexStages(fp,
+							       ren1->dst,
+							       osha, omode,
+							       asha, amode,
+							       bsha, bmode,
+							       1 /* clear */);
+				}
+				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
+			}
+		}
+	}
+	path_list_clear(&srcNames, 0);
+	if (pclose(fp)) {
+		die("git update-index --index-info failed");
+	}
+	return cleanMerge;
+}
+
+static unsigned char *has_sha(const unsigned char *sha)
+{
+	return memcmp(sha, null_sha1, 20) == 0 ? NULL: (unsigned char *)sha;
+}
+
+static int sha_eq(const unsigned char *a, const unsigned char *b)
+{
+	if ( !a && !b )
+		return 2;
+	return a && b && memcmp(a, b, 20) == 0;
+}
+
+// Per entry merge function
+// ------------------------
+// Merge one cache entry.
+int processEntry(struct index_entry *entry,
+		 const char *branch1Name,
+		 const char *branch2Name)
+{
+	//    printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
+	//    print_index_entry("\tpath: ", entry);
+	int cleanMerge = 1;
+	const char *path = entry->path;
+	unsigned char *oSha = has_sha(entry->stages[1].sha);
+	unsigned char *aSha = has_sha(entry->stages[2].sha);
+	unsigned char *bSha = has_sha(entry->stages[3].sha);
+	unsigned oMode = entry->stages[1].mode;
+	unsigned aMode = entry->stages[2].mode;
+	unsigned bMode = entry->stages[3].mode;
+	FILE *fp = git_update_index_pipe();
+
+	if ( oSha && (!aSha || !bSha) ) {
+		//
+		// Case A: Deleted in one
+		//
+		if ( (!aSha && !bSha) ||
+		     (sha_eq(aSha, oSha) && !bSha) ||
+		     (!aSha && sha_eq(bSha, oSha)) ) {
+			// Deleted in both or deleted in one and unchanged in the other
+			if ( aSha )
+				output("Removing %s", path);
+			removeFile(fp, 1, path);
+		} else {
+			// Deleted in one and changed in the other
+			cleanMerge = 0;
+			if ( !aSha ) {
+				output("CONFLICT (delete/modify): %s deleted in %s "
+				       "and modified in %s. Version %s of %s left in tree.",
+				       path, branch1Name,
+				       branch2Name, branch2Name, path);
+				updateFile(fp, 0, bSha, bMode, path);
+			} else {
+				output("CONFLICT (delete/modify): %s deleted in %s "
+				       "and modified in %s. Version %s of %s left in tree.",
+				       path, branch2Name,
+				       branch1Name, branch1Name, path);
+				updateFile(fp, 0, aSha, aMode, path);
+			}
+		}
+
+	} else if ( (!oSha && aSha && !bSha) ||
+		    (!oSha && !aSha && bSha) ) {
+		//
+		// Case B: Added in one.
+		//
+		const char *addBranch;
+		const char *otherBranch;
+		unsigned mode;
+		const unsigned char *sha;
+		const char *conf;
+
+		if ( aSha ) {
+			addBranch = branch1Name;
+			otherBranch = branch2Name;
+			mode = aMode;
+			sha = aSha;
+			conf = "file/directory";
+		} else {
+			addBranch = branch2Name;
+			otherBranch = branch1Name;
+			mode = bMode;
+			sha = bSha;
+			conf = "directory/file";
+		}
+		if ( path_list_has_path(currentDirectorySet, path) ) {
+			cleanMerge = 0;
+			const char *newPath = uniquePath(path, addBranch);
+			output("CONFLICT (%s): There is a directory with name %s in %s. "
+			       "Adding %s as %s",
+			       conf, path, otherBranch, path, newPath);
+			removeFile(fp, 0, path);
+			updateFile(fp, 0, sha, mode, newPath);
+		} else {
+			output("Adding %s", path);
+			updateFile(fp, 1, sha, mode, path);
+		}
+	} else if ( !oSha && aSha && bSha ) {
+		//
+		// Case C: Added in both (check for same permissions).
+		//
+		if ( sha_eq(aSha, bSha) ) {
+			if ( aMode != bMode ) {
+				cleanMerge = 0;
+				output("CONFLICT: File %s added identically in both branches, "
+				       "but permissions conflict %06o->%06o",
+				       path, aMode, bMode);
+				output("CONFLICT: adding with permission: %06o", aMode);
+				updateFile(fp, 0, aSha, aMode, path);
+			} else {
+				// This case is handled by git-read-tree
+				assert(0 && "This case must be handled by git-read-tree");
+			}
+		} else {
+			cleanMerge = 0;
+			const char *newPath1 = uniquePath(path, branch1Name);
+			const char *newPath2 = uniquePath(path, branch2Name);
+			output("CONFLICT (add/add): File %s added non-identically "
+			       "in both branches. Adding as %s and %s instead.",
+			       path, newPath1, newPath2);
+			removeFile(fp, 0, path);
+			updateFile(fp, 0, aSha, aMode, newPath1);
+			updateFile(fp, 0, bSha, bMode, newPath2);
+		}
+
+	} else if ( oSha && aSha && bSha ) {
+		//
+		// case D: Modified in both, but differently.
+		//
+		output("Auto-merging %s\n", path);
+		struct merge_file_info mfi;
+		mfi = mergeFile(path, oSha, oMode,
+				path, aSha, aMode,
+				path, bSha, bMode,
+				branch1Name, branch2Name);
+
+		if ( mfi.clean )
+			updateFile(fp, 1, mfi.sha, mfi.mode, path);
+		else {
+			cleanMerge = 0;
+			output("CONFLICT (content): Merge conflict in %s", path);
+
+			if ( index_only )
+				updateFile(fp, 0, mfi.sha, mfi.mode, path);
+			else
+				updateFileExt(fp, mfi.sha, mfi.mode, path,
+					      0 /* updateCache */, 1 /* updateWd */);
+		}
+	} else
+		die("Fatal merge failure, shouldn't happen.");
+
+	if (pclose(fp))
+		die("updating entry failed in git update-index");
+	return cleanMerge;
+}
+
+struct merge_tree_result merge_trees(struct tree *head,
+				     struct tree *merge,
+				     struct tree *common,
+				     const char *branch1Name,
+				     const char *branch2Name)
+{
+	int code;
+	struct merge_tree_result result = { NULL, 0 };
+	if ( !memcmp(common->object.sha1, merge->object.sha1, 20) ) {
+		output("Already uptodate!");
+		result.tree = head;
+		result.clean = 1;
+		return result;
+	}
+
+	code = git_merge_trees(index_only ? "-i": "-u", common, head, merge);
+
+	if ( code != 0 )
+		die("merging of trees %s and %s failed",
+		    sha1_to_hex(head->object.sha1),
+		    sha1_to_hex(merge->object.sha1));
+
+	result.tree = git_write_tree();
+
+	if ( !result.tree ) {
+		struct path_list *filesM = NULL, *dirsM = NULL;
+
+		getFilesAndDirs(head, &currentFileSet, &currentDirectorySet);
+		getFilesAndDirs(merge, &filesM, &dirsM);
+
+		path_list_union_update(&currentFileSet, filesM);
+		path_list_union_update(&currentDirectorySet, dirsM);
+
+		struct index_entry *entries = unmergedCacheEntries();
+		struct rename_entry *renamesHead, *renamesMerge;
+		renamesHead  = getRenames(head, common, head, merge, &entries);
+		renamesMerge = getRenames(merge, common, head, merge, &entries);
+		result.clean = processRenames(renamesHead, renamesMerge,
+					      branch1Name, branch2Name);
+		struct index_entry *e;
+		for ( e = entries; e; e = e->next ) {
+			if ( e->processed )
+				continue;
+			if ( !processEntry(e, branch1Name, branch2Name) )
+				result.clean = 0;
+			if ( result.clean || index_only )
+				result.tree = git_write_tree();
+			else
+				result.tree = NULL;
+		}
+		free_rename_entries(&renamesMerge);
+		free_rename_entries(&renamesHead);
+		free_index_entries(&entries);
+	} else {
+		result.clean = 1;
+		printf("merging of trees %s and %s resulted in %s\n",
+		       sha1_to_hex(head->object.sha1),
+		       sha1_to_hex(merge->object.sha1),
+		       sha1_to_hex(result.tree->object.sha1));
+	}
+
+	return result;
+}
+
+static void collect_nodes(struct node *node, struct node_list **res)
+{
+    node_list_insert(node, res);
+    struct node_list *p;
+    for ( p = node->parents; p; p = p->next )
+	collect_nodes(node, res);
+}
+
+struct node_list *reachable_nodes(struct node *n1, struct node *n2)
+{
+    struct node_list *res = NULL;
+    collect_nodes(n1, &res);
+    collect_nodes(n2, &res);
+    return res;
+}
+
+struct graph *graph_build(struct node_list *commits)
+{
+	struct graph *graph = malloc(sizeof(struct graph));
+	memset(graph->commits, 0, sizeof(graph->commits));
+
+	char cmd[256];
+	strcpy(cmd, "git-rev-list --parents");
+	struct node_list *cp;
+	for_each_node_list(cp,commits) {
+		graph_add_node(graph, cp->node);
+		strcat(cmd, " ");
+		strcat(cmd, node_hex_sha1(cp->node));
+	}
+	assert(strlen(cmd) < sizeof(cmd));
+
+	FILE *fp = popen(cmd, "r");
+	if (!fp)
+		die("%s failed: %s", cmd, strerror(errno));
+	while (!feof(fp)) {
+		unsigned char sha[20];
+		int ch;
+		if (fget_sha1(sha, fp, &ch))
+			break;
+		if (EOF == ch)
+			break;
+		// a commit
+		struct node *node = graph_node_bysha(graph, sha);
+		if (!node)
+		{
+			node = node_alloc(lookup_commit(sha));
+			graph_add_node(graph, node);
+		}
+		// ...and its parents. I assume a parent cannot be mentioned
+		// before the children.
+		struct node_list *parents = NULL;
+		while ('\n' != ch) {
+			if (fget_sha1(sha, fp, &ch)) {
+				die("invalid output from %s, "
+				    "sha1 (parents) expected",
+				    cmd);
+				break;
+			}
+			if (EOF == ch)
+				break;
+			struct node *pn = graph_node_bysha(graph, sha);
+			if (!pn) {
+				pn = node_alloc(lookup_commit(sha));
+				graph_add_node(graph, pn);
+			}
+			node_list_insert(pn, &parents);
+		}
+		node_set_parents(node, parents);
+	}
+	pclose(fp);
+	return graph;
+}
+
+static int get_sha1_0(const char *ref, unsigned char *sha)
+{
+	size_t n = strlen(ref);
+	char *t = xmalloc(n + 4);
+	memcpy(t, ref, n);
+	strcpy(t + n, "^0");
+	int rc = get_sha1(t, sha);
+	free(t);
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	static const char *bases[2];
+	static unsigned bases_count = 0;
+
+	original_index_file = getenv("GIT_INDEX_FILE");
+
+	if (!original_index_file)
+		original_index_file = strdup(git_path("index"));
+
+	temporary_index_file = strdup(git_path("mrg-rcrsv-tmp-idx"));
+
+	if (argc < 4)
+		die("Usage: %s <base>... -- <head> <remote> ...\n", argv[0]);
+
+	int i;
+	for (i = 1; i < argc; ++i) {
+		if (!strcmp(argv[i], "--"))
+			break;
+		if (bases_count < sizeof(bases)/sizeof(*bases))
+			bases[bases_count++] = argv[i];
+	}
+	if (argc - i != 3) /* "--" "<head>" "<remote>" */
+		die("Not handling anything other than two heads merge.");
+
+	unsigned char sha1[20], sha2[20];
+	const char *branch1, *branch2;
+
+	branch1 = argv[++i];
+	if (get_sha1_0(branch1, sha1) != 0)
+		die("invalid first branch %s", branch1);
+
+	branch2 = argv[++i];
+	if (get_sha1_0(branch2, sha2) != 0)
+		die("invalid second branch %s", branch2);
+
+	printf("Merging %s with %s\n", branch1, branch2);
+
+	struct merge_result result;
+	struct node *h1 = node_alloc(lookup_commit(sha1));
+	struct node *h2 = node_alloc(lookup_commit(sha2));
+
+	if (bases_count == 1) {
+		unsigned char shabase[20];
+		if (get_sha1_0(bases[0], shabase) != 0)
+			die("invalid base commit %s", bases[0]);
+		struct node *ancestor = node_alloc(lookup_commit(shabase));
+		result = merge(h1, h2, branch1, branch2, NULL, 0, ancestor);
+	} else {
+		struct node_list *commits = NULL;
+		node_list_insert(h1, &commits);
+		node_list_insert(h2, &commits->next);
+		struct graph *graph = graph_build(commits);
+		result = merge(h1, h2, branch1, branch2, graph, 0, NULL);
+	}
+	return result.clean ? 0: 1;
+}
+
+// vim: sw=8 noet
diff --git a/path-list.c b/path-list.c
new file mode 100644
index 0000000..f969116
--- /dev/null
+++ b/path-list.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include "cache.h"
+#include "path-list.h"
+
+static struct path_list *path_list_pool = NULL;
+
+struct path_list *path_list_insert(char *path, struct path_list **list)
+{
+    struct path_list *e;
+    if ( path_list_pool ) {
+        e = path_list_pool;
+        path_list_pool = path_list_pool->next;
+    } else
+        e = xmalloc(sizeof(struct path_list));
+    e->next = *list;
+    e->path = path;
+    *list = e;
+    return e;
+}
+
+void path_list_clear(struct path_list **list, int free_path)
+{
+    while ( *list ) {
+        struct path_list *next = (*list)->next;
+	if (free_path)
+	    free((*list)->path);
+        (*list)->path = NULL;
+        (*list)->next = path_list_pool;
+        path_list_pool = *list;
+        *list = next;
+    }
+}
+
+int path_list_count(const struct path_list *list)
+{
+    int n = 0;
+    for ( ; list; list = list->next )
+        ++n;
+    return n;
+}
+
+int path_list_has_path(const struct path_list *list, const char *path)
+{
+    for ( ; list; list = list->next )
+        if ( strcmp(path, list->path) == 0 )
+            return 1;
+    return 0;
+}
+
+// in place
+void path_list_union_update(struct path_list **dst, const struct path_list *src)
+{
+    const struct path_list *i = src;
+    while ( i ) {
+        if ( !path_list_has_path(*dst, i->path) )
+	    path_list_insert(i->path, dst);
+	i = i->next;
+    }
+}
+
+void print_path_list(const char *text, const struct path_list *p)
+{
+    if ( text )
+        printf("%s\n", text);
+    for ( ; p; p = p->next )
+        printf("%s\n", p->path);
+}
+
diff --git a/path-list.h b/path-list.h
new file mode 100644
index 0000000..7e79f17
--- /dev/null
+++ b/path-list.h
@@ -0,0 +1,31 @@
+#ifndef _PATH_LIST_H_
+#define _PATH_LIST_H_
+
+struct path_list
+{
+    struct path_list *next;
+    char *path;
+};
+
+#define for_each_path(p,list) for ( p = (list); p; p = p->next )
+
+void print_path_list(const char *text, const struct path_list *p);
+
+static inline struct path_list *path_list_shift(struct path_list **list)
+{
+    struct path_list *cur = *list;
+    *list = cur->next;
+    return cur;
+}
+
+int path_list_count(const struct path_list *list);
+int path_list_has_path(const struct path_list *list, const char *path);
+void path_list_union_update(struct path_list **dst, const struct path_list *src);
+void path_list_clear(struct path_list **list, int free_path);
+static inline void path_list_free(struct path_list *list)
+{
+    path_list_clear(&list, 1);
+}
+struct path_list *path_list_insert(char *path, struct path_list **list);
+
+#endif /* _PATH_LIST_H_ */

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 15:48 CFT: merge-recursive in C (updated) Alex Riesen
@ 2006-06-27 16:42 ` Johannes Schindelin
  2006-06-27 22:32   ` Alex Riesen
  2006-06-27 18:22 ` CFT: merge-recursive in C (updated) Linus Torvalds
  1 sibling, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-27 16:42 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

On Tue, 27 Jun 2006, Alex Riesen wrote:

> - use a pipe to "git-update-index --index-info" instead of using command 
> line

It might be an even better idea to call the cache functions directly. But 
in that case, you definitely want a function set_index_file() in 
environment.c, as well as a way to invalidate the active cache. Something 
like

	if (active_cache) {
		free(active_cache);
		active_cache = NULL;
		active_nr = active_alloc = 0;
		free(active_cache_tree);
		active_cache_tree = NULL;
		read_cache();
	}

> Good news is that it is faster: 6min vs 10min. Bad news is that it is 
> still not enough for me and it is only on Linux (Windows will be slower, 
> still testing), uses an awful lot of memory and CPU.

No fear. I am sure we'll get there.

> Path list optimization should be next (and I'll be glad if someone does 
> this before me).

See below.

> Also graph algos have a greate optimization potential (intersection, all 
> parents of a node, node_by_sha).

The longer I look at it, the more I am convinced that this graph thing is 
not necessary. I was hesitant at first that addCommonRoot() would not be 
possible, but nobody says we cannot override commit->parents.

I have some commits in a private branch to split out get_merge_bases() 
from merge-base.c, so I'll try that next.

BTW, before actually finishing this, we might want to do away with 
capitalizedFunctionNames and 4-space indent.

And here comes my patch (on top of what you sent) to avoid linked lists:

 merge-recursive.c |   61 ++++++++++++-------------
 path-list.c       |  128 +++++++++++++++++++++++++++++++++++------------------
 path-list.h       |   31 +++++++------
 3 files changed, 129 insertions(+), 91 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 547150e..b608a49 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -75,8 +75,8 @@ void print_index_entry(const char *text,
 }
 #endif
 
-static struct path_list *currentFileSet;
-static struct path_list *currentDirectorySet;
+static struct path_list currentFileSet = {NULL, 0, 0};
+static struct path_list currentDirectorySet = {NULL, 0, 0};
 
 static int output_indent = 0;
 
@@ -297,8 +297,8 @@ static int read_tree_rt(struct tree *tre
 
 struct files_and_dirs
 {
-	struct path_list **files;
-	struct path_list **dirs;
+	struct path_list *files;
+	struct path_list *dirs;
 };
 
 static int save_files_dirs(const char *sha1,
@@ -309,19 +309,16 @@ static int save_files_dirs(const char *s
 	struct files_and_dirs *data = data_;
 	char *path = strdup(path_);
 
-	if (S_ISDIR(mode)) {
+	if (S_ISDIR(mode))
 		path_list_insert(path, data->dirs);
-		data->dirs = &(*data->dirs)->next;
-	} else {
+	else
 		path_list_insert(path, data->files);
-		data->files = &(*data->files)->next;
-	}
 	return READ_TREE_RECURSIVE;
 }
 
 int getFilesAndDirs(struct tree *tree,
-		    struct path_list **files,
-		    struct path_list **dirs)
+		    struct path_list *files,
+		    struct path_list *dirs)
 {
 	struct files_and_dirs data;
 	path_list_clear(files, 1);
@@ -330,7 +327,7 @@ int getFilesAndDirs(struct tree *tree,
 	data.dirs = dirs;
 	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 )
 		return 0;
-	return path_list_count(*files) + path_list_count(*dirs);
+	return path_list_count(files) + path_list_count(dirs);
 }
 
 struct index_entry *index_entry_find(struct index_entry *ents, const char *path)
@@ -714,8 +711,8 @@ char *uniquePath(const char *path, const
 			*p = '_';
 	int suffix = 0;
 	struct stat st;
-	while ( path_list_has_path(currentFileSet, newpath) ||
-		path_list_has_path(currentDirectorySet, newpath) ||
+	while ( path_list_has_path(&currentFileSet, newpath) ||
+		path_list_has_path(&currentDirectorySet, newpath) ||
 		lstat(newpath, &st) == 0 ) {
 		sprintf(p, "_%d", suffix++);
 	}
@@ -980,23 +977,21 @@ int processRenames(struct rename_entry *
 	//	   branchNameA, renamesA ? renamesA->src: "(none)",
 	//	   branchNameB, renamesB ? renamesB->dst: "(none)");
 
-	struct path_list *srcNames = NULL;
+	struct path_list srcNames = {NULL, 0, 0};
 	const struct rename_entry *sre;
+	char **src;
 
 	for (sre = renamesA; sre; sre = sre->next)
-		if (!path_list_has_path(srcNames, sre->src))
-			path_list_insert(sre->src, &srcNames);
+		path_list_insert(sre->src, &srcNames);
 	for (sre = renamesB; sre; sre = sre->next)
-		if (!path_list_has_path(srcNames, sre->src))
-			path_list_insert(sre->src, &srcNames);
+		path_list_insert(sre->src, &srcNames);
 
 	FILE *fp = git_update_index_pipe();
-	struct path_list *src;
-	for_each_path(src,srcNames) {
+	for_each_path(src,&srcNames) {
 		struct rename_entry *renames1, *renames2, *ren1, *ren2;
 		const char *branchName1, *branchName2;
-		ren1 = find_rename_bysrc(renamesA, src->path);
-		ren2 = find_rename_bysrc(renamesB, src->path);
+		ren1 = find_rename_bysrc(renamesA, *src);
+		ren2 = find_rename_bysrc(renamesB, *src);
 		if ( ren1 ) {
 			renames1 = renamesA;
 			renames2 = renamesB;
@@ -1029,17 +1024,17 @@ int processRenames(struct rename_entry *
 				output("CONFLICT (rename/rename): "
 				       "Rename %s->%s in branch %s "
 				       "rename %s->%s in %s",
-				       src->path, ren1->dst, branchName1,
-				       src->path, ren2->dst, branchName2);
+				       *src, ren1->dst, branchName1,
+				       *src, ren2->dst, branchName2);
 				cleanMerge = 0;
 				char *dstName1 = ren1->dst, *dstName2 = ren2->dst;
-				if ( path_list_has_path(currentDirectorySet, ren1->dst) ) {
+				if ( path_list_has_path(&currentDirectorySet, ren1->dst) ) {
 					dstName1 = uniquePath(ren1->dst, branchName1);
 					output("%s is a directory in %s adding as %s instead",
 					       ren1->dst, branchName2, dstName1);
 					removeFile(fp, 0, ren1->dst);
 				}
-				if ( path_list_has_path(currentDirectorySet, ren2->dst) ) {
+				if ( path_list_has_path(&currentDirectorySet, ren2->dst) ) {
 					dstName2 = uniquePath(ren2->dst, branchName2);
 					output("%s is a directory in %s adding as %s instead",
 					       ren2->dst, branchName1, dstName2);
@@ -1063,7 +1058,7 @@ int processRenames(struct rename_entry *
 						ren2->dst, ren2->dst_sha, ren2->dst_mode,
 						branchName1, branchName2);
 				if ( mfi.merge || !mfi.clean )
-					output("Renaming %s->%s", src->path, ren1->dst);
+					output("Renaming %s->%s", *src, ren1->dst);
 
 				if ( mfi.merge )
 					output("Auto-merging %s", ren1->dst);
@@ -1102,7 +1097,7 @@ int processRenames(struct rename_entry *
 			char *newPath;
 			struct rename_entry *dst2;
 
-			if ( path_list_has_path(currentDirectorySet, ren1->dst) ) {
+			if ( path_list_has_path(&currentDirectorySet, ren1->dst) ) {
 				newPath = uniquePath(ren1->dst, branchName1);
 				output("CONFLICT (rename/directory): Rename %s->%s in %s "
 				       " directory %s added in %s",
@@ -1283,7 +1278,7 @@ int processEntry(struct index_entry *ent
 			sha = bSha;
 			conf = "directory/file";
 		}
-		if ( path_list_has_path(currentDirectorySet, path) ) {
+		if ( path_list_has_path(&currentDirectorySet, path) ) {
 			cleanMerge = 0;
 			const char *newPath = uniquePath(path, addBranch);
 			output("CONFLICT (%s): There is a directory with name %s in %s. "
@@ -1379,13 +1374,13 @@ struct merge_tree_result merge_trees(str
 	result.tree = git_write_tree();
 
 	if ( !result.tree ) {
-		struct path_list *filesM = NULL, *dirsM = NULL;
+		struct path_list filesM = {NULL, 0, 0}, dirsM = {NULL, 0, 0};
 
 		getFilesAndDirs(head, &currentFileSet, &currentDirectorySet);
 		getFilesAndDirs(merge, &filesM, &dirsM);
 
-		path_list_union_update(&currentFileSet, filesM);
-		path_list_union_update(&currentDirectorySet, dirsM);
+		path_list_union_update(&currentFileSet, &filesM);
+		path_list_union_update(&currentDirectorySet, &dirsM);
 
 		struct index_entry *entries = unmergedCacheEntries();
 		struct rename_entry *renamesHead, *renamesMerge;
diff --git a/path-list.c b/path-list.c
index f969116..fbfc103 100644
--- a/path-list.c
+++ b/path-list.c
@@ -2,67 +2,109 @@ #include <stdio.h>
 #include "cache.h"
 #include "path-list.h"
 
-static struct path_list *path_list_pool = NULL;
-
-struct path_list *path_list_insert(char *path, struct path_list **list)
+/* if there is no exact match, point to the index where the entry could be
+ * inserted */
+static int get_entry_index(const struct path_list *container, const char *path,
+		int *exact_match)
 {
-    struct path_list *e;
-    if ( path_list_pool ) {
-        e = path_list_pool;
-        path_list_pool = path_list_pool->next;
-    } else
-        e = xmalloc(sizeof(struct path_list));
-    e->next = *list;
-    e->path = path;
-    *list = e;
-    return e;
+	int left = -1, right = container->nr;
+
+	while (left + 1 < right) {
+		int middle = (left + right) / 2;
+		int compare = strcmp(path, container->paths[middle]);
+		if (compare < 0)
+			right = middle;
+		else if (compare > 0)
+			left = middle;
+		else {
+			*exact_match = 1;
+			return middle;
+		}
+	}
+
+	*exact_match = 0;
+	return right;
 }
 
-void path_list_clear(struct path_list **list, int free_path)
+/* returns -1-index if already exists */
+static int add_entry(struct path_list *container, const char *path)
 {
-    while ( *list ) {
-        struct path_list *next = (*list)->next;
-	if (free_path)
-	    free((*list)->path);
-        (*list)->path = NULL;
-        (*list)->next = path_list_pool;
-        path_list_pool = *list;
-        *list = next;
-    }
+	int exact_match;
+	int index = get_entry_index(container, path, &exact_match);
+
+	if (exact_match)
+		return -1 - index;
+
+	if (container->nr + 1 >= container->alloc) {
+		container->alloc += 32;
+		container->paths = realloc(container->paths,
+				container->alloc * sizeof(char *));
+	}
+	if (index < container->nr)
+		memmove(container->paths + index + 1,
+				container->paths + index,
+				(container->nr - index) * sizeof(char *));
+	container->paths[index] = strdup(path);
+	container->nr++;
+
+	return index;
 }
 
-int path_list_count(const struct path_list *list)
+char *path_list_insert(char *path, struct path_list *list)
 {
-    int n = 0;
-    for ( ; list; list = list->next )
-        ++n;
-    return n;
+	int index = add_entry(list, path);
+
+	if (index < 0)
+		index = 1 - index;
+
+	return list->paths[index];
 }
 
 int path_list_has_path(const struct path_list *list, const char *path)
 {
-    for ( ; list; list = list->next )
-        if ( strcmp(path, list->path) == 0 )
-            return 1;
-    return 0;
+	int exact_match;
+	get_entry_index(list, path, &exact_match);
+	return exact_match;
 }
 
 // in place
-void path_list_union_update(struct path_list **dst, const struct path_list *src)
+void path_list_union_update(struct path_list *dst, const struct path_list *src)
 {
-    const struct path_list *i = src;
-    while ( i ) {
-        if ( !path_list_has_path(*dst, i->path) )
-	    path_list_insert(i->path, dst);
-	i = i->next;
-    }
+	char **new_paths;
+	int i = 0, j = 0, nr = 0, alloc = dst->nr + dst->nr;
+
+	new_paths = xcalloc(sizeof(char *), alloc);
+
+	while (i < dst->nr || j < src->nr) {
+		char **entry = new_paths + nr++;
+		if (i == dst->nr)
+			*entry = src->paths[j++];
+		else if (j == src->nr)
+			*entry = dst->paths[i++];
+		else {
+			int compare = strcmp(dst->paths[i], src->paths[j]);
+			if (compare > 0)
+				*entry = src->paths[j++];
+			else {
+				*entry = dst->paths[i++];
+				if (!compare)
+					free(src->paths[j++]);
+			}
+		}
+	}
+
+	free(dst->paths);
+	dst->paths = new_paths;
+	dst->nr = nr;
+	dst->alloc = alloc;
 }
 
 void print_path_list(const char *text, const struct path_list *p)
 {
-    if ( text )
-        printf("%s\n", text);
-    for ( ; p; p = p->next )
-        printf("%s\n", p->path);
+	int i;
+	if ( text )
+		printf("%s\n", text);
+	for (i = 0; i < p->nr; i++)
+		printf("%s\n", p->paths[i]);
 }
 
diff --git a/path-list.h b/path-list.h
index 7e79f17..a12c7a4 100644
--- a/path-list.h
+++ b/path-list.h
@@ -3,29 +3,30 @@ #define _PATH_LIST_H_
 
 struct path_list
 {
-    struct path_list *next;
-    char *path;
+    char **paths;
+    unsigned int nr, alloc;
 };
 
-#define for_each_path(p,list) for ( p = (list); p; p = p->next )
+#define for_each_path(p,list) for ( p = (list)->paths; p != (list)->paths + (list)->nr; p++ )
 
 void print_path_list(const char *text, const struct path_list *p);
 
-static inline struct path_list *path_list_shift(struct path_list **list)
-{
-    struct path_list *cur = *list;
-    *list = cur->next;
-    return cur;
-}
+#define path_list_count(list) (list)->nr
 
-int path_list_count(const struct path_list *list);
 int path_list_has_path(const struct path_list *list, const char *path);
-void path_list_union_update(struct path_list **dst, const struct path_list *src);
-void path_list_clear(struct path_list **list, int free_path);
-static inline void path_list_free(struct path_list *list)
+void path_list_union_update(struct path_list *dst, const struct path_list *src);
+static inline void path_list_clear(struct path_list *list, int free_paths)
 {
-    path_list_clear(&list, 1);
+	if (list->paths) {
+		int i;
+		if (free_paths)
+			for (i = 0; i < list->nr; i++)
+				free(list->paths[i]);
+		free(list->paths);
+	}
+	list->paths = NULL;
+	list->nr = list->alloc = 0;
 }
-struct path_list *path_list_insert(char *path, struct path_list **list);
+char *path_list_insert(char *path, struct path_list *list);
 
 #endif /* _PATH_LIST_H_ */
-- 
1.4.1.rc1.g3583-dirty

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 16:42 ` Johannes Schindelin
@ 2006-06-27 22:32   ` Alex Riesen
  2006-06-28  9:34     ` Alex Riesen
                       ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-27 22:32 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Johannes Schindelin, Tue, Jun 27, 2006 18:42:51 +0200:
> > - use a pipe to "git-update-index --index-info" instead of using command 
> > line
> 
> It might be an even better idea to call the cache functions directly. But 
> in that case, you definitely want a function set_index_file() in 
> environment.c, as well as a way to invalidate the active cache. Something 
> like
> 
> 	if (active_cache) {
> 		free(active_cache);
> 		active_cache = NULL;
> 		active_nr = active_alloc = 0;
> 		free(active_cache_tree);
> 		active_cache_tree = NULL;
> 		read_cache();
> 	}

We should check how will that work across recursive calls to merge
(that long function in the middle of merge-recursive.c), but yes,
it's certanly worth a try.

> > Path list optimization should be next (and I'll be glad if someone does 
> > this before me).
> 
> See below.
> 

Aah, thanks. Merged, tried, tested, left in patch.
The reallocs can cause some undesirable heap fragmentation, don't you
think?

> > Also graph algos have a greate optimization potential (intersection, all 
> > parents of a node, node_by_sha).
> 
> The longer I look at it, the more I am convinced that this graph thing is 
> not necessary. I was hesitant at first that addCommonRoot() would not be 
> possible, but nobody says we cannot override commit->parents.

I tried to replace that code completely with a call to git-merge-base
(it does not happen too often). So far it passed all tests.

> I have some commits in a private branch to split out get_merge_bases() 
> from merge-base.c, so I'll try that next.

Thanks, that'd be nice.

> BTW, before actually finishing this, we might want to do away with 
> capitalizedFunctionNames and 4-space indent.

4-space indent should be gone by now, the names pending (they were
important in initial debugging after conversion).

And I almost forgot about using git-merge-tree instead of
"git-read-tree -m". I hope this message will remind me tomorrow
to think more about that.

Updated patch (your path_list and git-merge-base instead of graph's
common ancestors):


diff --git a/Makefile b/Makefile
index cde619c..660f09b 100644
--- a/Makefile
+++ b/Makefile
@@ -163,7 +163,8 @@ PROGRAMS = \
 	git-upload-pack$X git-verify-pack$X \
 	git-symbolic-ref$X \
 	git-name-rev$X git-pack-redundant$X git-repo-config$X git-var$X \
-	git-describe$X git-merge-tree$X git-blame$X git-imap-send$X
+	git-describe$X git-merge-tree$X git-blame$X git-imap-send$X \
+	git-merge-recur$X
 
 BUILT_INS = git-log$X git-whatchanged$X git-show$X git-update-ref$X \
 	git-count-objects$X git-diff$X git-push$X git-mailsplit$X \
@@ -595,6 +596,10 @@ git-http-push$X: revision.o http.o http-
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
+git-merge-recur$X: merge-recursive.o graph.o path-list.o $(LIB_FILE)
+	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
+		$(LIBS)
+
 $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
 $(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
 $(DIFF_OBJS): diffcore.h
diff --git a/git-merge.sh b/git-merge.sh
index fc25f8d..7d81122 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -9,7 +9,7 @@ USAGE='[-n] [--no-commit] [--squash] [-s
 LF='
 '
 
-all_strategies='recursive octopus resolve stupid ours'
+all_strategies='recur recursive octopus resolve stupid ours'
 default_twohead_strategies='recursive'
 default_octopus_strategies='octopus'
 no_trivial_merge_strategies='ours'
@@ -17,7 +17,7 @@ use_strategies=
 
 index_merge=t
 if test "@@NO_PYTHON@@"; then
-	all_strategies='resolve octopus stupid ours'
+	all_strategies='recur resolve octopus stupid ours'
 	default_twohead_strategies='resolve'
 fi
 
diff --git a/graph.c b/graph.c
new file mode 100644
index 0000000..fa2bfee
--- /dev/null
+++ b/graph.c
@@ -0,0 +1,252 @@
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "cache.h"
+#include "commit.h"
+#include "graph.h"
+
+// does not belong here
+struct tree *git_write_tree()
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-write-tree\n",
+		getenv("GIT_INDEX_FILE"));
+#endif
+	FILE *fp = popen("git-write-tree 2>/dev/null", "r");
+	char buf[41];
+	unsigned char sha1[20];
+	int ch;
+	unsigned i = 0;
+	while ( (ch = fgetc(fp)) != EOF )
+		if ( i < sizeof(buf)-1 && ch >= '0' && ch <= 'f' )
+			buf[i++] = ch;
+		else
+			break;
+	int rc = pclose(fp);
+	if ( rc == -1 || WEXITSTATUS(rc) )
+		return NULL;
+	buf[i] = '\0';
+	if ( get_sha1(buf, sha1) != 0 )
+		return NULL;
+	return lookup_tree(sha1);
+}
+
+const char *node_title(struct node *node, int *len)
+{
+	const char *s = "(null commit)";
+	*len = strlen(s);
+
+	if ( node->virtual ) {
+		s = node->comment;
+		*len = strlen(s);
+	} else if ( node->commit ) {
+		if ( parse_commit(node->commit) != 0 ) {
+			s = "(bad commit)";
+			*len = strlen(s);
+		} else {
+			s = node->commit->buffer;
+			char prev = '\0';
+			while ( *s ) {
+				if ( '\n' == prev && '\n' == *s ) {
+					++s;
+					break;
+				}
+				prev = *s++;
+			}
+			*len = 0;
+			while ( s[*len] && '\n' != s[*len] )
+				++(*len);
+		}
+	}
+	return s;
+}
+
+const unsigned char *node_sha(const struct node *node)
+{
+	return node->commit->object.sha1;
+}
+
+const char *node_hex_sha1(const struct node *node)
+{
+	return node->commit ?  sha1_to_hex(node->commit->object.sha1):
+		node->virtual ? "virtual": "undefined";
+}
+
+static int sha_eq(const unsigned char *a, const unsigned char *b)
+{
+	if ( !a && !b )
+		return 2;
+	return a && b && memcmp(a, b, 20) == 0;
+}
+
+static struct node_list *node_list_pool = NULL;
+static unsigned node_list_pool_size = 0;
+
+unsigned node_list_count(const struct node_list *l)
+{
+	unsigned c = 0;
+	while ( l ) {
+		++c;
+		l = l->next;
+	}
+	return c;
+}
+
+struct node_list *node_list_insert(struct node *node, struct node_list **list)
+{
+	struct node_list *e;
+	if ( node_list_pool )
+		e = node_list_shift(&node_list_pool);
+	else
+		e = malloc(sizeof(struct node_list));
+	e->next = *list;
+	e->node = node;
+	*list = e;
+	return e;
+}
+
+struct node_list *node_list_free1(struct node_list *list, int free_nodes)
+{
+	struct node_list *next = list->next;
+	if ( free_nodes )
+		free(list->node);
+
+	if ( node_list_pool_size < 1000 ) {
+		list->node = NULL;
+		list->next = node_list_pool;
+		node_list_pool = list;
+	} else
+		free(list);
+	return next;
+}
+
+void node_list_free(struct node_list **list, int free_nodes)
+{
+	while ( *list )
+		node_list_free1(node_list_shift(list), free_nodes);
+}
+
+struct node *node_alloc(struct commit *commit)
+{
+	struct node *node = malloc(sizeof(struct node));
+	node->parents = NULL;
+	node->parents_count = 0;
+	node->children = NULL;
+	node->virtual = 0;
+	node->commit = commit;
+	if ( parse_commit(commit) == 0 )
+		node->tree = commit->tree;
+	else
+		die("failed to parse commit %s", sha1_to_hex(commit->object.sha1));
+
+	return node;
+}
+
+struct node *node_alloc_virtual(struct tree *tree, const char *comment)
+{
+	struct node *node = malloc(sizeof(struct node));
+	node->parents = NULL;
+	node->children = NULL;
+	node->commit = NULL;
+	node->tree = tree;
+	node->virtual = 1;
+	static unsigned virtual_id = 0;
+	node->virtual_id = virtual_id++;
+	node->comment = comment;
+	return node;
+}
+
+void node_set_parents(struct node *node, struct node_list *parents)
+{
+	struct node_list *p;
+	node->parents_count = 0;
+	for_each_node_list(p, node->parents = parents) {
+		node_list_insert(node, &p->node->children);
+		node->parents_count++;
+	}
+}
+
+static inline int node_eq(const struct node *a, const struct node *b)
+{
+	if ( a == b )
+		return 1;
+	if ( a->virtual != b->virtual )
+		return 0;
+	if ( a->virtual )
+		return a->virtual_id == b->virtual_id;
+	if ( a->commit == b->commit )
+		return 1;
+	return sha_eq(a->commit->object.sha1, b->commit->object.sha1);
+}
+
+struct node_list *node_list_find_node(const struct node *node,
+				      const struct node_list *list)
+{
+	const struct node_list *p;
+	for_each_node_list(p, list)
+		if ( node_eq(p->node, node) )
+			break;
+	return (struct node_list*)p;
+}
+
+// a & b. a and are invalid after the call,
+// the result will contain all the common nodes
+struct node_list *node_list_intersect(struct node_list *a,
+				      struct node_list *b)
+{
+	struct node_list *result = NULL;
+	struct node_list *p = a;
+	while ( p ) {
+		struct node_list *next = p->next;
+		struct node_list *bn = node_list_find_node(p->node, b);
+		if ( bn ) {
+			p->next = result;
+			result = p;
+		} else
+			node_list_free1(p, 0);
+		p = next;
+	}
+	node_list_free(&b, 0);
+	return result;
+}
+
+struct node *graph_add_node(struct graph *graph, struct node *node)
+{
+	struct node_list **bucket;
+	if ( node->virtual )
+		// virtual nodes hashed by lowest byte of virtual_id
+		bucket = graph->commits + (node->virtual_id & 0xff);
+	else
+		bucket = graph->commits + node->commit->object.sha1[0];
+	node_list_insert(node, bucket);
+	return node;
+}
+
+struct node *graph_node_bysha(const struct graph *graph,
+			      const unsigned char *sha)
+{
+	const struct node_list *head = *(graph->commits + sha[0]);
+	while ( head ) {
+		if ( !head->node->virtual &&
+		     sha_eq(head->node->commit->object.sha1, sha) ) {
+			return head->node;
+		}
+		head = head->next;
+	}
+	return NULL;
+}
+
+#if 0
+void node_list_print(const char *msg, const struct node_list *list)
+{
+    const struct node_list *p;
+    printf("%s\n", msg);
+    for_each_node_list(p, list) {
+	int len;
+	const char *msg = node_title(p->node, &len);
+	printf("\t%s %.*s\n", node_hex_sha1(p->node), len, msg);
+    }
+}
+#endif
+
+// vim: sw=8 noet
diff --git a/graph.h b/graph.h
new file mode 100644
index 0000000..3b8ca5c
--- /dev/null
+++ b/graph.h
@@ -0,0 +1,80 @@
+#ifndef _GRAPH_H_
+#define _GRAPH_H_
+
+struct node;
+struct graph;
+struct commit;
+struct tree;
+struct commit_list;
+
+struct node_list
+{
+	struct node_list *next;
+	struct node *node;
+};
+
+struct node
+{
+	struct commit *commit;
+	struct tree *tree;
+
+	unsigned parents_count;
+	struct node_list *parents;
+	struct node_list *children;
+
+	unsigned virtual:1;
+	unsigned virtual_id;
+	const char *comment;
+};
+
+struct node_list *node_list_insert(struct node *node, struct node_list **list);
+void node_list_free(struct node_list **list, int free_nodes);
+struct node_list *node_list_free1(struct node_list *list, int free_node);
+
+static inline struct node_list *node_list_shift(struct node_list **list)
+{
+	struct node_list *head = *list;
+	*list = head->next;
+	return head;
+}
+
+static inline struct node *node_list_shift_node(struct node_list **list)
+{
+	struct node *node = (*list)->node;
+	*list = node_list_free1(*list, 0);
+	return node;
+}
+
+struct node *node_alloc(struct commit *);
+struct node *node_alloc_virtual(struct tree *, const char *comment);
+
+void node_set_parents(struct node *node, struct node_list *parents);
+unsigned node_list_count(const struct node_list *);
+
+struct node_list *node_list_find_node(const struct node *node,
+				      const struct node_list *list);
+
+
+struct graph
+{
+	// hashed by first byte of SHA-1 or low byte of virtual_id
+	struct node_list *commits[256];
+};
+
+struct node *graph_add_node(struct graph *graph, struct node *node);
+struct node *graph_node_bysha(const struct graph *graph,
+			      const unsigned char *sha);
+
+#define for_each_node_list(p,list) \
+	for ( p = (list); p; p = p->next )
+
+const char *node_title(struct node *, int *len);
+const unsigned char *node_sha(const struct node *);
+const char *node_hex_sha1(const struct node *);
+void node_list_print(const char *msg, const struct node_list *list);
+
+struct tree *git_write_tree();
+
+#endif /* _GRAPH_H_ */
+
+// vim: sw=8 noet
diff --git a/merge-recursive.c b/merge-recursive.c
new file mode 100644
index 0000000..9bbb426
--- /dev/null
+++ b/merge-recursive.c
@@ -0,0 +1,1576 @@
+//
+// Recursive Merge algorithm stolen from git-merge-recursive.py by
+// Fredrik Kuivinen.
+//
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "cache.h"
+#include "commit.h"
+#include "blob.h"
+#include "tree-walk.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "run-command.h"
+
+#include "graph.h"
+#include "path-list.h"
+
+#define for_each_commit(p,list) for ( p = (list); p; p = p->next )
+
+struct merge_result
+{
+	struct node *commit;
+	unsigned clean:1;
+};
+
+struct merge_tree_result
+{
+	struct tree *tree;
+	unsigned clean:1;
+};
+
+struct index_entry
+{
+	struct index_entry *next;
+	struct
+	{
+		unsigned mode;
+		unsigned char sha[20];
+	} stages[4];
+	unsigned processed:1;
+	char path[1];
+};
+
+struct index_entry *index_entry_alloc(const char *path)
+{
+	size_t n = strlen(path); // index_entry::path has room for \0
+	struct index_entry *p = xmalloc(sizeof(struct index_entry) + n);
+	if ( !p )
+		return NULL;
+	memcpy(p->path, path, n + 1);
+	p->next = NULL;
+	p->processed = 0;
+	return p;
+}
+
+#if 0
+static
+void print_index_entry(const char *text, const struct index_entry *e)
+{
+    printf("%s%s next: %p %s\n", text,
+	   e ? e->path: NULL,
+	   e ? e->next: NULL,
+	   e && e->processed ? "processed": "");
+    if ( e ) {
+	int i;
+	for ( i = 1; i < 4; ++i )
+	    printf("\tstage[%d]: %06o %s\n", i,
+		   e->stages[i].mode,
+		   sha1_to_hex(e->stages[i].sha));
+    }
+}
+#endif
+
+static struct path_list currentFileSet = {NULL, 0, 0};
+static struct path_list currentDirectorySet = {NULL, 0, 0};
+
+static int output_indent = 0;
+
+static void output(const char *fmt, ...)
+{
+	va_list args;
+	int i;
+	for ( i = output_indent; i--; )
+		fputs("  ", stdout);
+	va_start(args, fmt);
+	vfprintf(stdout, fmt, args);
+	va_end(args);
+	fputc('\n', stdout);
+}
+
+static const char *original_index_file;
+static const char *temporary_index_file;
+
+static void setup_index(int temp)
+{
+	const char *idx = temp ? temporary_index_file: original_index_file;
+	unlink(temporary_index_file);
+	setenv("GIT_INDEX_FILE", idx, 1);
+}
+
+// This is a global variable which is used in a number of places but
+// only written to in the 'merge' function.
+
+// index_only == 1    => Don't leave any non-stage 0 entries in the cache and
+//                       don't update the working directory.
+//               0    => Leave unmerged entries in the cache and update
+//                       the working directory.
+static int index_only = 0; // cacheOnly
+
+static int git_read_tree(const struct tree *tree)
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-read-tree %s\n",
+		getenv("GIT_INDEX_FILE"),
+		sha1_to_hex(tree->object.sha1));
+#endif
+	const char *argv[] = { "git-read-tree", NULL, NULL, };
+	argv[1] = sha1_to_hex(tree->object.sha1);
+	int rc = run_command_v(2, argv);
+	return rc < 0 ? -1: rc;
+}
+
+static int git_merge_trees(const char *update_arg,
+			   struct tree *common,
+			   struct tree *head,
+			   struct tree *merge)
+{
+#if 0
+	fprintf(stderr, "GIT_INDEX_FILE='%s' git-read-tree %s -m %s %s %s\n",
+		getenv("GIT_INDEX_FILE"),
+		update_arg,
+		sha1_to_hex(common->object.sha1),
+		sha1_to_hex(head->object.sha1),
+		sha1_to_hex(merge->object.sha1));
+#endif
+	const char *argv[] = {
+		"git-read-tree", NULL, "-m", NULL, NULL, NULL,
+		NULL,
+	};
+	argv[1] = update_arg;
+	argv[3] = sha1_to_hex(common->object.sha1);
+	argv[4] = sha1_to_hex(head->object.sha1);
+	argv[5] = sha1_to_hex(merge->object.sha1);
+	int rc = run_command_v(6, argv);
+	return rc < 0 ? -1: rc;
+}
+
+struct merge_tree_result merge_trees(struct tree *head,
+				     struct tree *merge,
+				     struct tree *common,
+				     const char *branch1Name,
+				     const char *branch2Name);
+
+static int fget_sha1(unsigned char *sha, FILE *fp, int *ch);
+
+// The entry point to the merge code
+
+// Merge the commits h1 and h2, return the resulting virtual
+// commit object and a flag indicating the cleaness of the merge.
+static
+struct merge_result merge(struct node *h1,
+			  struct node *h2,
+			  const char *branch1Name,
+			  const char *branch2Name,
+			  struct graph *graph,
+			  int callDepth /* =0 */,
+			  struct node *ancestor /* =None */)
+{
+	struct merge_result result = { NULL, 0 };
+
+	const char *msg;
+	int msglen;
+	output("Merging:");
+	msg = node_title(h1, &msglen);
+	output("%s %.*s", node_hex_sha1(h1), msglen, msg);
+	msg = node_title(h2, &msglen);
+	output("%s %.*s", node_hex_sha1(h2), msglen, msg);
+	if ( !ancestor && !graph )
+		die("graph is not initialized");
+	struct node_list *ca = NULL;
+	if ( ancestor )
+		node_list_insert(ancestor, &ca);
+	else {
+		struct node_list **pca = &ca;
+		char cmd[100];
+		sprintf(cmd, "git-merge-base --all %s %s",
+			node_hex_sha1(h1),
+			node_hex_sha1(h2));
+		FILE *fp = popen(cmd, "r");
+		while (!feof(fp)) {
+			unsigned char sha1[20];
+			int ch;
+			if (fget_sha1(sha1, fp, &ch) == 0) {
+				struct node *n;
+				n = node_alloc(lookup_commit(sha1));
+				node_list_insert(n, pca);
+				pca = &(*pca)->next;
+			}
+		}
+		pclose(fp);
+	}
+
+	output("found %u common ancestor(s):", node_list_count(ca));
+	struct node_list *x;
+	for_each_node_list(x,ca) {
+		msg = node_title(x->node, &msglen);
+		output("%s %.*s", node_hex_sha1(x->node), msglen, msg);
+	}
+
+	struct node *mergedCA = node_list_shift_node(&ca);
+
+	struct node_list *h;
+	for_each_commit(h,ca) {
+		output_indent = callDepth + 1;
+		result = merge(mergedCA, h->node,
+			       "Temporary merge branch 1",
+			       "Temporary merge branch 2",
+			       graph,
+			       callDepth + 1,
+			       NULL);
+		mergedCA = result.commit;
+		output_indent = callDepth;
+
+		if ( !mergedCA )
+			die("merge returned no commit");
+	}
+
+	if ( callDepth == 0 ) {
+		setup_index(0);
+		index_only = 0;
+	} else {
+		setup_index(1);
+		git_read_tree(h1->tree);
+		index_only = 1;
+	}
+
+	struct merge_tree_result mtr;
+	mtr = merge_trees(h1->tree, h2->tree,
+			  mergedCA->tree, branch1Name, branch2Name);
+
+	if ( !ancestor && (mtr.clean || index_only) ) {
+		result.commit = node_alloc_virtual(mtr.tree, "merged tree");
+		struct node_list *parents = NULL;
+		node_list_insert(h1, &parents);
+		node_list_insert(h2, &parents->next);
+		node_set_parents(result.commit, parents);
+		graph_add_node(graph, result.commit);
+	} else
+		result.commit = NULL;
+
+	result.clean = mtr.clean;
+	return result;
+}
+
+#define READ_TREE_FOUND 2
+typedef int (*read_tree_rt_fn_t)(const char *sha1,
+				 const char *path,
+				 unsigned mode,
+				 void *data);
+
+// git-ls-tree -r -t <tree>
+static int read_tree_rt(struct tree *tree,
+			const char *base,
+			int baselen,
+			read_tree_rt_fn_t fn,
+			void *data)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+
+	if (parse_tree(tree))
+		return -1;
+
+	desc.buf = tree->buffer;
+	desc.size = tree->size;
+
+	while ( tree_entry(&desc, &entry) ) {
+		int retval;
+		char *path = xmalloc(baselen + entry.pathlen + 2);
+		memcpy(path, base, baselen);
+		memcpy(path + baselen, entry.path, entry.pathlen);
+		path[baselen + entry.pathlen] = '\0';
+
+		switch ( retval = fn(entry.sha1, path, entry.mode, data) ) {
+		case READ_TREE_RECURSIVE:
+			break;
+		case 0:
+			free(path);
+			continue;
+		default:
+			free(path);
+			return retval;
+		}
+		if (S_ISDIR(entry.mode)) {
+			path[baselen + entry.pathlen] = '/';
+			path[baselen + entry.pathlen + 1] = '\0';
+			retval = read_tree_rt(lookup_tree(entry.sha1),
+					      path,
+					      baselen + entry.pathlen + 1,
+					      fn, data);
+			path[baselen + entry.pathlen] = '\0';
+			if (retval) {
+				free(path);
+				return retval;
+			}
+		}
+		free(path);
+	}
+	return 0;
+}
+
+struct files_and_dirs
+{
+	struct path_list *files;
+	struct path_list *dirs;
+};
+
+static int save_files_dirs(const char *sha1,
+			   const char *path_,
+			   unsigned mode,
+			   void *data_)
+{
+	struct files_and_dirs *data = data_;
+	char *path = strdup(path_);
+
+	if (S_ISDIR(mode))
+		path_list_insert(path, data->dirs);
+	else
+		path_list_insert(path, data->files);
+	return READ_TREE_RECURSIVE;
+}
+
+int getFilesAndDirs(struct tree *tree,
+		    struct path_list *files,
+		    struct path_list *dirs)
+{
+	struct files_and_dirs data;
+	path_list_clear(files, 1);
+	path_list_clear(dirs, 1);
+	data.files = files;
+	data.dirs = dirs;
+	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 )
+		return 0;
+	return path_list_count(files) + path_list_count(dirs);
+}
+
+struct index_entry *index_entry_find(struct index_entry *ents, const char *path)
+{
+	struct index_entry *e;
+	for ( e = ents; e; e = e->next )
+		if ( strcmp(e->path, path) == 0 )
+			break;
+	return e;
+}
+
+struct index_entry *index_entry_get(struct index_entry **ents, const char *path)
+{
+	struct index_entry *e, **tail = ents;
+	for ( e = *ents; e; e = e->next ) {
+		if ( strcmp(e->path, path) == 0 )
+			return e;
+		tail = &e->next;
+	}
+	e = index_entry_alloc(path);
+	memset(e->stages, 0, sizeof(e->stages));
+	return *tail = e;
+}
+
+struct find_entry
+{
+	const char *path;
+	unsigned char *sha;
+	unsigned *mode;
+};
+
+static int find_entry(const char *sha,
+		      const char *path,
+		      unsigned mode,
+		      void *data_)
+{
+	struct find_entry *data = data_;
+	if ( strcmp(path, data->path) == 0 ) {
+		memcpy(data->sha, sha, 20);
+		*data->mode = mode;
+		return READ_TREE_FOUND;
+	}
+	return READ_TREE_RECURSIVE;
+}
+
+// Returns a CacheEntry object which doesn't have to correspond to
+// a real cache entry in Git's index.
+struct index_entry *index_entry_from_db(const char *path,
+					struct tree *o,
+					struct tree *a,
+					struct tree *b)
+{
+	struct index_entry *e = index_entry_alloc(path);
+	struct find_entry data;
+	data.path = path;
+	data.sha = e->stages[1].sha;
+	data.mode = &e->stages[1].mode;
+	if ( read_tree_rt(o, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "1: %s:%s not found\n",
+		// 	sha1_to_hex(o->object.sha1), path);
+		memcpy(e->stages[1].sha, null_sha1, 20);
+		e->stages[1].mode = 0;
+	}
+	data.sha = e->stages[2].sha;
+	data.mode = &e->stages[2].mode;
+	if ( read_tree_rt(a, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "2: %s:%s not found\n",
+		// 	sha1_to_hex(a->object.sha1), path);
+		memcpy(e->stages[2].sha, null_sha1, 20);
+		e->stages[2].mode = 0;
+	}
+	data.sha = e->stages[3].sha;
+	data.mode = &e->stages[3].mode;
+	if ( read_tree_rt(b, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
+		// fprintf(stderr, "3: %s:%s not found\n",
+		// 	sha1_to_hex(b->object.sha1), path);
+		memcpy(e->stages[3].sha, null_sha1, 20);
+		e->stages[3].mode = 0;
+	}
+	return e;
+}
+
+void free_index_entries(struct index_entry **ents)
+{
+	while ( *ents ) {
+		struct index_entry *next = (*ents)->next;
+		free(*ents);
+		*ents = next;
+	}
+}
+
+static int fget_mode(unsigned *mode, FILE *fp, int *ch)
+{
+	int p;
+	char buf[8];
+	for ( p = 0; (*ch = fgetc(fp)) != EOF && p < 6; ) {
+		if ( *ch == '\x20' || *ch == '\t' || *ch == '\n' || *ch == '\r' )
+			break;
+		if ( *ch < '0' || *ch > '7' )
+			return -1;
+		buf[p++] = *ch;
+	}
+	buf[p] = '\0';
+	*mode = strtoul(buf, 0, 8);
+	return 0;
+}
+
+static int fget_sha1(unsigned char *sha, FILE *fp, int *ch)
+{
+	char buf[40];
+	int p;
+	for ( p = 0; (*ch = fgetc(fp)) != EOF && p < 40; ) {
+		if ( ('0' <= *ch && *ch <= '9') ||
+		     ('a' <= *ch && *ch <= 'f') ||
+		     ('A' <= *ch && *ch <= 'F') )
+			buf[p++] = *ch;
+		else
+			return -1;
+	}
+	if ( p != 40 || get_sha1_hex(buf, sha) == -1 )
+		return -1;
+	return 0;
+}
+// Create a dictionary mapping file names to CacheEntry objects. The
+// dictionary contains one entry for every path with a non-zero stage entry.
+struct index_entry *unmergedCacheEntries()
+{
+	struct index_entry *unmerged = NULL;
+	FILE *fp = popen("git-ls-files -z --unmerged", "r");
+	if ( !fp )
+		return NULL;
+	int ch;
+	while ( !feof(fp) ) {
+		unsigned mode;
+		unsigned char sha[20];
+		char stage = '0';
+		char path[PATH_MAX];
+		int p;
+		// mode
+		if ( fget_mode(&mode, fp, &ch) )
+			goto wait_eol;
+		if ( '\x20' != ch )
+			goto wait_eol;
+		// SHA1
+		if ( fget_sha1(sha, fp, &ch) )
+			goto wait_eol;
+		if ( '\x20' != ch )
+			goto wait_eol;
+		// stage
+		if ( (ch = fgetc(fp)) != EOF ) {
+			stage = ch;
+			if ( ch < '1' || ch > '3' )
+				goto wait_eol;
+		}
+		if ( (ch = fgetc(fp)) == EOF || '\t' != ch )
+			goto wait_eol;
+		// path
+		for ( p = 0; (ch = fgetc(fp)) != EOF; ++p ) {
+			path[p] = ch;
+			if ( !ch )
+				break;
+			if ( p == sizeof(path) - 1 ) {
+				path[p] = '\0';
+				error("path too long: %s", path);
+				goto wait_eol;
+			}
+		}
+		if ( ch )
+			goto wait_eol;
+		// printf("unmerged %08o %s %c %s\n",mode,sha1_to_hex(sha),stage,path);
+		struct index_entry *e = index_entry_get(&unmerged, path);
+		e->stages[stage - '1' + 1].mode = mode;
+		memcpy(e->stages[stage - '1' + 1].sha, sha, 20);
+		continue;
+	wait_eol:
+		while ( (ch = fgetc(fp)) != EOF && ch );
+	}
+	pclose(fp);
+	return unmerged;
+}
+
+struct rename_entry
+{
+	struct rename_entry *next;
+
+	char *src;
+	unsigned char src_sha[20];
+	unsigned src_mode;
+	struct index_entry *src_entry;
+
+	char *dst;
+	unsigned char dst_sha[20];
+	unsigned dst_mode;
+	struct index_entry *dst_entry;
+
+	unsigned score;
+	unsigned processed:1;
+};
+
+struct rename_entry *find_rename_bysrc(struct rename_entry *e,
+				       const char *name)
+{
+	while ( e ) {
+		if ( strcmp(e->src, name) == 0 )
+			break;
+		e = e->next;
+	}
+	return e;
+}
+
+struct rename_entry *find_rename_bydst(struct rename_entry *e,
+				       const char *name)
+{
+	while ( e ) {
+		if ( strcmp(e->dst, name) == 0 )
+			break;
+		e = e->next;
+	}
+	return e;
+}
+
+void rename_entry_free(struct rename_entry *p)
+{
+	free(p->src);
+	free(p->dst);
+	free(p);
+}
+
+void free_rename_entries(struct rename_entry **list)
+{
+	while ( *list ) {
+		struct rename_entry *next = (*list)->next;
+		rename_entry_free(*list);
+		*list = next;
+	}
+}
+
+// Get information of all renames which occured between 'oTree' and
+// 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
+// 'bTree') to be able to associate the correct cache entries with
+// the rename information. 'tree' is always equal to either aTree or bTree.
+struct rename_entry *getRenames(struct tree *tree,
+				struct tree *oTree,
+				struct tree *aTree,
+				struct tree *bTree,
+				struct index_entry **entries)
+{
+	struct rename_entry *renames = NULL;
+	struct rename_entry **rptr = &renames;
+	struct diff_options opts;
+	diff_setup(&opts);
+	opts.recursive = 1;
+	opts.detect_rename = DIFF_DETECT_RENAME;
+	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+	if (diff_setup_done(&opts) < 0)
+		die("diff setup failed");
+	diff_tree_sha1(oTree->object.sha1, tree->object.sha1, "", &opts);
+	diffcore_std(&opts);
+	int i;
+	for (i = 0; i < diff_queued_diff.nr; ++i) {
+		struct rename_entry *re;
+		struct diff_filepair *pair = diff_queued_diff.queue[i];
+		if (pair->status != 'R')
+			continue;
+		re = xmalloc(sizeof(*re));
+		re->next = NULL;
+		re->processed = 0;
+		re->score = pair->score;
+		memcpy(re->src_sha, pair->one->sha1, 20);
+		re->src = strdup(pair->one->path);
+		re->src_mode = pair->one->mode;
+		memcpy(re->dst_sha, pair->two->sha1, 20);
+		re->dst = strdup(pair->two->path);
+		re->dst_mode = pair->two->mode;
+		re->src_entry = index_entry_find(*entries, re->src);
+		if ( !re->src_entry ) {
+			re->src_entry = index_entry_from_db(re->src, oTree, aTree, bTree);
+			re->src_entry->next = *entries;
+			*entries = re->src_entry;
+		}
+		re->dst_entry = index_entry_find(*entries, re->dst);
+		if ( !re->dst_entry ) {
+			re->dst_entry = index_entry_from_db(re->dst, oTree, aTree, bTree);
+			re->dst_entry->next = *entries;
+			*entries = re->dst_entry;
+		}
+		*rptr = re;
+		rptr = &re->next;
+	}
+	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+	diff_flush(&opts);
+	return renames;
+}
+
+static FILE *git_update_index_pipe()
+{
+	return popen("git-update-index -z --index-info", "w");
+}
+
+int setIndexStages(FILE *fp,
+		   const char *path,
+		   unsigned char *osha, unsigned omode,
+		   unsigned char *asha, unsigned amode,
+		   unsigned char *bsha, unsigned bmode,
+		   int clear /* =True */)
+{
+	if ( !fp )
+		return -1;
+	if ( clear ) {
+		fprintf(fp, "0 %s\t%s", sha1_to_hex(null_sha1), path);
+		fputc('\0', fp);
+	}
+	if ( omode ) {
+		fprintf(fp, "0%o %s 1\t%s", omode, sha1_to_hex(osha), path);
+		fputc('\0', fp);
+	}
+	if ( amode ) {
+		fprintf(fp, "0%o %s 2\t%s", amode, sha1_to_hex(asha), path);
+		fputc('\0', fp);
+	}
+	if ( bmode ) {
+		fprintf(fp, "0%o %s 3\t%s", bmode, sha1_to_hex(bsha), path);
+		fputc('\0', fp);
+	}
+	return 0;
+}
+
+static int remove_path(const char *name)
+{
+	int ret;
+	char *slash;
+
+	ret = unlink(name);
+	if ( ret )
+		return ret;
+	int len = strlen(name);
+	char *dirs = malloc(len+1);
+	memcpy(dirs, name, len);
+	dirs[len] = '\0';
+	while ( (slash = strrchr(name, '/')) ) {
+		*slash = '\0';
+		len = slash - name;
+		if ( rmdir(name) != 0 )
+			break;
+	}
+	free(dirs);
+	return ret;
+}
+
+int removeFile(FILE *fp, int clean, const char *path)
+{
+	int updateCache = index_only || clean;
+	int updateWd = !index_only;
+
+	if ( updateCache ) {
+		if ( !fp )
+			return -1;
+		fprintf(fp, "0 %s\t%s", sha1_to_hex(null_sha1), path);
+		fputc('\0', fp);
+		return 0;
+	}
+	if ( updateWd )
+	{
+		unlink(path);
+		if ( errno != ENOENT || errno != EISDIR )
+			return -1;
+		remove_path(path);
+	}
+	return 0;
+}
+
+char *uniquePath(const char *path, const char *branch)
+{
+	char *newpath = xmalloc(strlen(path) + 1 + strlen(branch) + 8 + 1);
+	strcpy(newpath, path);
+	strcat(newpath, "~");
+	char *p = newpath + strlen(newpath);
+	strcpy(p, branch);
+	for ( ; *p; ++p )
+		if ( '/' == *p )
+			*p = '_';
+	int suffix = 0;
+	struct stat st;
+	while ( path_list_has_path(&currentFileSet, newpath) ||
+		path_list_has_path(&currentDirectorySet, newpath) ||
+		lstat(newpath, &st) == 0 ) {
+		sprintf(p, "_%d", suffix++);
+	}
+	path_list_insert(newpath, &currentFileSet);
+	return newpath;
+}
+
+int mkdir_p(const char *path, unsigned long mode, int create_last)
+{
+	char *buf = strdup(path);
+	char *p;
+
+	for ( p = buf; *p; ++p ) {
+		if ( *p != '/' )
+			continue;
+		*p = '\0';
+		if (mkdir(buf, mode)) {
+			int e = errno;
+			if ( e == EEXIST ) {
+				struct stat st;
+				if ( !stat(buf, &st) && S_ISDIR(st.st_mode) )
+					goto next; /* ok */
+				errno = e;
+			}
+			free(buf);
+			return -1;
+		}
+	next:
+		*p = '/';
+	}
+	free(buf);
+	if ( create_last && mkdir(path, mode) )
+		return -1;
+	return 0;
+}
+
+/* stolen from builtin-cat-file.c */
+static void flush_buffer(int fd, const char *buf, unsigned long size)
+{
+	while (size > 0) {
+		long ret = xwrite(fd, buf, size);
+		if (ret < 0) {
+			/* Ignore epipe */
+			if (errno == EPIPE)
+				break;
+			die("git-cat-file: %s", strerror(errno));
+		} else if (!ret) {
+			die("git-cat-file: disk full?");
+		}
+		size -= ret;
+		buf += ret;
+	}
+}
+
+void updateFileExt(FILE *fp,
+		   const unsigned char *sha,
+		   unsigned mode,
+		   const char *path,
+		   int updateCache,
+		   int updateWd)
+{
+	if ( index_only )
+		updateWd = 0;
+
+	if ( updateWd ) {
+		char type[20];
+		void *buf;
+		unsigned long size;
+
+		buf = read_sha1_file(sha, type, &size);
+		if (!buf)
+			die("cannot read object %s '%s'", sha1_to_hex(sha), path);
+		if ( strcmp(type, blob_type) != 0 )
+			die("blob expected for %s '%s'", sha1_to_hex(sha), path);
+
+		if ( S_ISREG(mode) ) {
+			if ( mkdir_p(path, 0777, 0 /* don't create last element */) )
+				die("failed to create path %s: %s", path, strerror(errno));
+			unlink(path);
+			if ( mode & 0100 )
+				mode = 0777;
+			else
+				mode = 0666;
+			int fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, mode);
+			if ( fd < 0 )
+				die("failed to open %s: %s", path, strerror(errno));
+			flush_buffer(fd, buf, size);
+			close(fd);
+		} else if ( S_ISLNK(mode) ) {
+			char *linkTarget = malloc(size + 1);
+			memcpy(linkTarget, buf, size);
+			linkTarget[size] = '\0';
+			mkdir_p(path, 0777, 0);
+			symlink(linkTarget, path);
+		} else
+			die("do not know what to do with %06o %s '%s'",
+			    mode, sha1_to_hex(sha), path);
+	}
+	if ( updateCache )
+	{
+		// XXX just always use "git update-index --index-info"?
+		fprintf(fp, "%06o %s\t%s", mode, sha1_to_hex(sha), path);
+		fputc('\0', fp);
+	}
+}
+
+void updateFile(FILE *fp,
+		int clean,
+		const unsigned char *sha,
+		unsigned mode,
+		const char *path)
+{
+	updateFileExt(fp, sha, mode, path, index_only || clean, !index_only);
+}
+
+// Low level file merging, update and removal
+// ------------------------------------------
+struct merge_file_info
+{
+	unsigned char sha[20];
+	unsigned mode;
+	unsigned clean:1;
+	unsigned merge:1;
+};
+
+static char *git_unpack_file(unsigned char *sha1, char *path)
+{
+	void *buf;
+	char type[20];
+	unsigned long size;
+	int fd;
+
+	buf = read_sha1_file(sha1, type, &size);
+	if (!buf || strcmp(type, blob_type))
+		die("unable to read blob object %s", sha1_to_hex(sha1));
+
+	strcpy(path, ".merge_file_XXXXXX");
+	fd = mkstemp(path);
+	if (fd < 0)
+		die("unable to create temp-file");
+	if (write(fd, buf, size) != size)
+		die("unable to write temp-file");
+	close(fd);
+	return path;
+}
+
+struct merge_file_info
+mergeFile(const char *oPath, unsigned char *oSha, unsigned oMode,
+	  const char *aPath, unsigned char *aSha, unsigned aMode,
+	  const char *bPath, unsigned char *bSha, unsigned bMode,
+	  const char *branch1Name, const char *branch2Name)
+{
+	struct merge_file_info result;
+	result.merge = 0;
+	result.clean = 1;
+
+	if ( (S_IFMT & aMode) != (S_IFMT & bMode) ) {
+		result.clean = 0;
+		if ( S_ISREG(aMode) ) {
+			result.mode = aMode;
+			memcpy(result.sha, aSha, 20);
+		} else {
+			result.mode = bMode;
+			memcpy(result.sha, bSha, 20);
+		}
+	} else {
+		if ( memcmp(aSha, oSha, 20) != 0 && memcmp(bSha, oSha, 20) != 0 )
+			result.merge = 1;
+
+		result.mode = aMode == oMode ? bMode: aMode;
+
+		if ( memcmp(aSha, oSha, 20) == 0 )
+			memcpy(result.sha, bSha, 20);
+		else if ( memcmp(bSha, oSha, 20) == 0 )
+			memcpy(result.sha, aSha, 20);
+		else if ( S_ISREG(aMode) ) {
+
+			int code = 1;
+			char orig[PATH_MAX];
+			char src1[PATH_MAX];
+			char src2[PATH_MAX];
+
+			git_unpack_file(oSha, orig);
+			git_unpack_file(aSha, src1);
+			git_unpack_file(bSha, src2);
+
+			const char *argv[] = {
+				"merge", "-L", NULL, "-L", NULL, "-L", NULL,
+				src1, orig, src2,
+				NULL
+			};
+			char *la, *lb, *lo;
+			argv[2] = la = malloc(strlen(branch1Name) + 2 + strlen(aPath));
+			strcat(strcat(strcpy(la, branch1Name), "/"), aPath);
+			argv[6] = lb = malloc(strlen(branch2Name) + 2 + strlen(bPath));
+			strcat(strcat(strcpy(lb, branch2Name), "/"), bPath);
+			argv[4] = lo = malloc(7 + strlen(oPath));
+			strcat(strcpy(lo, "orig/"), oPath);
+
+#if 0
+			printf("%s %s %s %s %s %s %s %s %s %s\n",
+			       argv[0], argv[1], argv[2], argv[3], argv[4],
+			       argv[5], argv[6], argv[7], argv[8], argv[9]);
+#endif
+			code = run_command_v(10, argv);
+
+			free(la);
+			free(lb);
+			free(lo);
+			if ( code && code < -256 ) {
+				die("Failed to execute 'merge'. merge(1) is used as the "
+				    "file-level merge tool. Is 'merge' in your path?");
+			}
+			char cmd[PATH_MAX];
+			snprintf(cmd, sizeof(cmd), "git-hash-object -t blob -w %s", src1);
+			FILE *fp = popen(cmd, "r");
+			if ( !fp )
+				die("cannot run git-hash-object: %s", strerror(errno));
+			int ch;
+			if ( fget_sha1(result.sha, fp, &ch) )
+				die("invalid output from git-hash-object");
+			pclose(fp);
+
+			unlink(orig);
+			unlink(src1);
+			unlink(src2);
+
+			result.clean = WEXITSTATUS(code) == 0;
+		} else {
+			if ( !(S_ISLNK(aMode) || S_ISLNK(bMode)) )
+				die("cannot merge modes?");
+
+			memcpy(result.sha, aSha, 20);
+
+			if ( memcmp(aSha, bSha, 20) != 0 )
+				result.clean = 0;
+		}
+	}
+
+	return result;
+}
+
+static void memswp(void *p1, void *p2, unsigned n)
+{
+	unsigned char *a = p1, *b = p2;
+	while ( n-- ) {
+		*a ^= *b;
+		*b ^= *a;
+		*a ^= *b;
+		++a;
+		++b;
+	}
+}
+
+int processRenames(struct rename_entry *renamesA,
+		   struct rename_entry *renamesB,
+		   const char *branchNameA,
+		   const char *branchNameB)
+{
+	int cleanMerge = 1;
+	//    printf("process renames %s:%s -> %s:%s\n",
+	//	   branchNameA, renamesA ? renamesA->src: "(none)",
+	//	   branchNameB, renamesB ? renamesB->dst: "(none)");
+
+	struct path_list srcNames = {NULL, 0, 0};
+	const struct rename_entry *sre;
+	char **src;
+
+	for (sre = renamesA; sre; sre = sre->next)
+		path_list_insert(sre->src, &srcNames);
+	for (sre = renamesB; sre; sre = sre->next)
+		path_list_insert(sre->src, &srcNames);
+
+	FILE *fp = git_update_index_pipe();
+	for_each_path(src,&srcNames) {
+		struct rename_entry *renames1, *renames2, *ren1, *ren2;
+		const char *branchName1, *branchName2;
+		ren1 = find_rename_bysrc(renamesA, *src);
+		ren2 = find_rename_bysrc(renamesB, *src);
+		if ( ren1 ) {
+			renames1 = renamesA;
+			renames2 = renamesB;
+			branchName1 = branchNameA;
+			branchName2 = branchNameB;
+		} else {
+			renames1 = renamesB;
+			renames2 = renamesA;
+			branchName1 = branchNameB;
+			branchName2 = branchNameA;
+			struct rename_entry *tmp = ren2;
+			ren2 = ren1;
+			ren1 = tmp;
+		}
+
+		ren1->dst_entry->processed = 1;
+		ren1->src_entry->processed = 1;
+
+		if ( ren1->processed )
+			continue;
+		ren1->processed = 1;
+
+		if ( ren2 ) {
+			// Renamed in 1 and renamed in 2
+			if ( strcmp(ren1->src, ren2->src) != 0 )
+				die("ren1.srcName != ren2.srcName");
+			ren2->dst_entry->processed = 1;
+			ren2->processed = 1;
+			if ( strcmp(ren1->dst, ren2->dst) != 0 ) {
+				output("CONFLICT (rename/rename): "
+				       "Rename %s->%s in branch %s "
+				       "rename %s->%s in %s",
+				       *src, ren1->dst, branchName1,
+				       *src, ren2->dst, branchName2);
+				cleanMerge = 0;
+				char *dstName1 = ren1->dst, *dstName2 = ren2->dst;
+				if ( path_list_has_path(&currentDirectorySet, ren1->dst) ) {
+					dstName1 = uniquePath(ren1->dst, branchName1);
+					output("%s is a directory in %s adding as %s instead",
+					       ren1->dst, branchName2, dstName1);
+					removeFile(fp, 0, ren1->dst);
+				}
+				if ( path_list_has_path(&currentDirectorySet, ren2->dst) ) {
+					dstName2 = uniquePath(ren2->dst, branchName2);
+					output("%s is a directory in %s adding as %s instead",
+					       ren2->dst, branchName1, dstName2);
+					removeFile(fp, 0, ren2->dst);
+				}
+				setIndexStages(fp, dstName1,
+					       NULL, 0,
+					       ren1->dst_sha, ren1->dst_mode,
+					       NULL, 0,
+					       1 /* clear */);
+				setIndexStages(fp, dstName2,
+					       NULL, 0,
+					       NULL, 0,
+					       ren2->dst_sha, ren2->dst_mode,
+					       1 /* clear */);
+			} else {
+				removeFile(fp, 1, ren1->src);
+				struct merge_file_info mfi;
+				mfi = mergeFile(ren1->src, ren1->src_sha, ren1->src_mode,
+						ren1->dst, ren1->dst_sha, ren1->dst_mode,
+						ren2->dst, ren2->dst_sha, ren2->dst_mode,
+						branchName1, branchName2);
+				if ( mfi.merge || !mfi.clean )
+					output("Renaming %s->%s", *src, ren1->dst);
+
+				if ( mfi.merge )
+					output("Auto-merging %s", ren1->dst);
+
+				if ( !mfi.clean ) {
+					output("CONFLICT (content): merge conflict in %s",
+					       ren1->dst);
+					cleanMerge = 0;
+
+					if ( !index_only )
+						setIndexStages(fp,
+							       ren1->dst,
+							       ren1->src_sha, ren1->src_mode,
+							       ren1->dst_sha, ren1->dst_mode,
+							       ren2->dst_sha, ren2->dst_mode,
+							       1 /* clear */);
+				}
+				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
+			}
+		} else {
+			// Renamed in 1, maybe changed in 2
+			removeFile(fp, 1, ren1->src);
+
+			unsigned char srcShaOtherBranch[20], dstShaOtherBranch[20];
+			unsigned srcModeOtherBranch, dstModeOtherBranch;
+
+			int stage = renamesA == renames1 ? 3: 2;
+
+			memcpy(srcShaOtherBranch, ren1->src_entry->stages[stage].sha, 20);
+			srcModeOtherBranch = ren1->src_entry->stages[stage].mode;
+
+			memcpy(dstShaOtherBranch, ren1->dst_entry->stages[stage].sha, 20);
+			dstModeOtherBranch = ren1->dst_entry->stages[stage].mode;
+
+			int tryMerge = 0;
+			char *newPath;
+			struct rename_entry *dst2;
+
+			if ( path_list_has_path(&currentDirectorySet, ren1->dst) ) {
+				newPath = uniquePath(ren1->dst, branchName1);
+				output("CONFLICT (rename/directory): Rename %s->%s in %s "
+				       " directory %s added in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       ren1->dst, branchName2);
+				output("Renaming %s to %s instead", ren1->src, newPath);
+				cleanMerge = 0;
+				removeFile(fp, 0, ren1->dst);
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, newPath);
+			} else if ( memcmp(srcShaOtherBranch, null_sha1, 20) == 0 ) {
+				output("CONFLICT (rename/delete): Rename %s->%s in %s "
+				       "and deleted in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       branchName2);
+				cleanMerge = 0;
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, ren1->dst);
+			} else if ( memcmp(dstShaOtherBranch, null_sha1, 20) != 0 ) {
+				newPath = uniquePath(ren1->dst, branchName2);
+				output("CONFLICT (rename/add): Rename %s->%s in %s. "
+				       "%s added in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       ren1->dst, branchName2);
+				output("Adding as %s instead", newPath);
+				updateFile(fp, 0, dstShaOtherBranch, dstModeOtherBranch, newPath);
+				cleanMerge = 0;
+				tryMerge = 1;
+			} else if ( (dst2 = find_rename_bydst(renames2, ren1->dst)) ) {
+				char *newPath1 = uniquePath(ren1->dst, branchName1);
+				char *newPath2 = uniquePath(dst2->dst, branchName2);
+				output("CONFLICT (rename/rename): Rename %s->%s in %s. "
+				       "Rename %s->%s in %s",
+				       ren1->src, ren1->dst, branchName1,
+				       dst2->src, dst2->dst, branchName2);
+				output("Renaming %s to %s and %s to %s instead",
+				       ren1->src, newPath1, dst2->src, newPath2);
+				removeFile(fp, 0, ren1->dst);
+				updateFile(fp, 0, ren1->dst_sha, ren1->dst_mode, newPath1);
+				updateFile(fp, 0, dst2->dst_sha, dst2->dst_mode, newPath2);
+				dst2->processed = 1;
+				cleanMerge = 0;
+			} else
+				tryMerge = 1;
+
+			if ( tryMerge ) {
+				char *oname = ren1->src;
+				char *aname = ren1->dst;
+				char *bname = ren1->src;
+				unsigned char osha[20], asha[20], bsha[20];
+				unsigned omode = ren1->src_mode;
+				unsigned amode = ren1->dst_mode;
+				unsigned bmode = srcModeOtherBranch;
+				memcpy(osha, ren1->src_sha, 20);
+				memcpy(asha, ren1->dst_sha, 20);
+				memcpy(bsha, srcShaOtherBranch, 20);
+				const char *aBranch = branchName1;
+				const char *bBranch = branchName2;
+
+				if ( renamesA != renames1 ) {
+					memswp(&aname, &bname, sizeof(aname));
+					memswp(asha, bsha, 20);
+					memswp(&aBranch, &bBranch, sizeof(aBranch));
+				}
+				struct merge_file_info mfi;
+				mfi = mergeFile(oname, osha, omode,
+						aname, asha, amode,
+						bname, bsha, bmode,
+						aBranch, bBranch);
+
+				if ( mfi.merge || !mfi.clean )
+					output("Renaming %s => %s", ren1->src, ren1->dst);
+				if ( mfi.merge )
+					output("Auto-merging %s", ren1->dst);
+				if ( !mfi.clean ) {
+					output("CONFLICT (rename/modify): Merge conflict in %s",
+					       ren1->dst);
+					cleanMerge = 0;
+
+					if ( !index_only )
+						setIndexStages(fp,
+							       ren1->dst,
+							       osha, omode,
+							       asha, amode,
+							       bsha, bmode,
+							       1 /* clear */);
+				}
+				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
+			}
+		}
+	}
+	path_list_clear(&srcNames, 0);
+	if (pclose(fp)) {
+		die("git update-index --index-info failed");
+	}
+	return cleanMerge;
+}
+
+static unsigned char *has_sha(const unsigned char *sha)
+{
+	return memcmp(sha, null_sha1, 20) == 0 ? NULL: (unsigned char *)sha;
+}
+
+static int sha_eq(const unsigned char *a, const unsigned char *b)
+{
+	if ( !a && !b )
+		return 2;
+	return a && b && memcmp(a, b, 20) == 0;
+}
+
+// Per entry merge function
+// ------------------------
+// Merge one cache entry.
+int processEntry(struct index_entry *entry,
+		 const char *branch1Name,
+		 const char *branch2Name)
+{
+	//    printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
+	//    print_index_entry("\tpath: ", entry);
+	int cleanMerge = 1;
+	const char *path = entry->path;
+	unsigned char *oSha = has_sha(entry->stages[1].sha);
+	unsigned char *aSha = has_sha(entry->stages[2].sha);
+	unsigned char *bSha = has_sha(entry->stages[3].sha);
+	unsigned oMode = entry->stages[1].mode;
+	unsigned aMode = entry->stages[2].mode;
+	unsigned bMode = entry->stages[3].mode;
+	FILE *fp = git_update_index_pipe();
+
+	if ( oSha && (!aSha || !bSha) ) {
+		//
+		// Case A: Deleted in one
+		//
+		if ( (!aSha && !bSha) ||
+		     (sha_eq(aSha, oSha) && !bSha) ||
+		     (!aSha && sha_eq(bSha, oSha)) ) {
+			// Deleted in both or deleted in one and unchanged in the other
+			if ( aSha )
+				output("Removing %s", path);
+			removeFile(fp, 1, path);
+		} else {
+			// Deleted in one and changed in the other
+			cleanMerge = 0;
+			if ( !aSha ) {
+				output("CONFLICT (delete/modify): %s deleted in %s "
+				       "and modified in %s. Version %s of %s left in tree.",
+				       path, branch1Name,
+				       branch2Name, branch2Name, path);
+				updateFile(fp, 0, bSha, bMode, path);
+			} else {
+				output("CONFLICT (delete/modify): %s deleted in %s "
+				       "and modified in %s. Version %s of %s left in tree.",
+				       path, branch2Name,
+				       branch1Name, branch1Name, path);
+				updateFile(fp, 0, aSha, aMode, path);
+			}
+		}
+
+	} else if ( (!oSha && aSha && !bSha) ||
+		    (!oSha && !aSha && bSha) ) {
+		//
+		// Case B: Added in one.
+		//
+		const char *addBranch;
+		const char *otherBranch;
+		unsigned mode;
+		const unsigned char *sha;
+		const char *conf;
+
+		if ( aSha ) {
+			addBranch = branch1Name;
+			otherBranch = branch2Name;
+			mode = aMode;
+			sha = aSha;
+			conf = "file/directory";
+		} else {
+			addBranch = branch2Name;
+			otherBranch = branch1Name;
+			mode = bMode;
+			sha = bSha;
+			conf = "directory/file";
+		}
+		if ( path_list_has_path(&currentDirectorySet, path) ) {
+			cleanMerge = 0;
+			const char *newPath = uniquePath(path, addBranch);
+			output("CONFLICT (%s): There is a directory with name %s in %s. "
+			       "Adding %s as %s",
+			       conf, path, otherBranch, path, newPath);
+			removeFile(fp, 0, path);
+			updateFile(fp, 0, sha, mode, newPath);
+		} else {
+			output("Adding %s", path);
+			updateFile(fp, 1, sha, mode, path);
+		}
+	} else if ( !oSha && aSha && bSha ) {
+		//
+		// Case C: Added in both (check for same permissions).
+		//
+		if ( sha_eq(aSha, bSha) ) {
+			if ( aMode != bMode ) {
+				cleanMerge = 0;
+				output("CONFLICT: File %s added identically in both branches, "
+				       "but permissions conflict %06o->%06o",
+				       path, aMode, bMode);
+				output("CONFLICT: adding with permission: %06o", aMode);
+				updateFile(fp, 0, aSha, aMode, path);
+			} else {
+				// This case is handled by git-read-tree
+				assert(0 && "This case must be handled by git-read-tree");
+			}
+		} else {
+			cleanMerge = 0;
+			const char *newPath1 = uniquePath(path, branch1Name);
+			const char *newPath2 = uniquePath(path, branch2Name);
+			output("CONFLICT (add/add): File %s added non-identically "
+			       "in both branches. Adding as %s and %s instead.",
+			       path, newPath1, newPath2);
+			removeFile(fp, 0, path);
+			updateFile(fp, 0, aSha, aMode, newPath1);
+			updateFile(fp, 0, bSha, bMode, newPath2);
+		}
+
+	} else if ( oSha && aSha && bSha ) {
+		//
+		// case D: Modified in both, but differently.
+		//
+		output("Auto-merging %s\n", path);
+		struct merge_file_info mfi;
+		mfi = mergeFile(path, oSha, oMode,
+				path, aSha, aMode,
+				path, bSha, bMode,
+				branch1Name, branch2Name);
+
+		if ( mfi.clean )
+			updateFile(fp, 1, mfi.sha, mfi.mode, path);
+		else {
+			cleanMerge = 0;
+			output("CONFLICT (content): Merge conflict in %s", path);
+
+			if ( index_only )
+				updateFile(fp, 0, mfi.sha, mfi.mode, path);
+			else
+				updateFileExt(fp, mfi.sha, mfi.mode, path,
+					      0 /* updateCache */, 1 /* updateWd */);
+		}
+	} else
+		die("Fatal merge failure, shouldn't happen.");
+
+	if (pclose(fp))
+		die("updating entry failed in git update-index");
+	return cleanMerge;
+}
+
+struct merge_tree_result merge_trees(struct tree *head,
+				     struct tree *merge,
+				     struct tree *common,
+				     const char *branch1Name,
+				     const char *branch2Name)
+{
+	int code;
+	struct merge_tree_result result = { NULL, 0 };
+	if ( !memcmp(common->object.sha1, merge->object.sha1, 20) ) {
+		output("Already uptodate!");
+		result.tree = head;
+		result.clean = 1;
+		return result;
+	}
+
+	code = git_merge_trees(index_only ? "-i": "-u", common, head, merge);
+
+	if ( code != 0 )
+		die("merging of trees %s and %s failed",
+		    sha1_to_hex(head->object.sha1),
+		    sha1_to_hex(merge->object.sha1));
+
+	result.tree = git_write_tree();
+
+	if ( !result.tree ) {
+		struct path_list filesM = {NULL, 0, 0}, dirsM = {NULL, 0, 0};
+
+		getFilesAndDirs(head, &currentFileSet, &currentDirectorySet);
+		getFilesAndDirs(merge, &filesM, &dirsM);
+
+		path_list_union_update(&currentFileSet, &filesM);
+		path_list_union_update(&currentDirectorySet, &dirsM);
+
+		struct index_entry *entries = unmergedCacheEntries();
+		struct rename_entry *renamesHead, *renamesMerge;
+		renamesHead  = getRenames(head, common, head, merge, &entries);
+		renamesMerge = getRenames(merge, common, head, merge, &entries);
+		result.clean = processRenames(renamesHead, renamesMerge,
+					      branch1Name, branch2Name);
+		struct index_entry *e;
+		for ( e = entries; e; e = e->next ) {
+			if ( e->processed )
+				continue;
+			if ( !processEntry(e, branch1Name, branch2Name) )
+				result.clean = 0;
+			if ( result.clean || index_only )
+				result.tree = git_write_tree();
+			else
+				result.tree = NULL;
+		}
+		free_rename_entries(&renamesMerge);
+		free_rename_entries(&renamesHead);
+		free_index_entries(&entries);
+	} else {
+		result.clean = 1;
+		printf("merging of trees %s and %s resulted in %s\n",
+		       sha1_to_hex(head->object.sha1),
+		       sha1_to_hex(merge->object.sha1),
+		       sha1_to_hex(result.tree->object.sha1));
+	}
+
+	return result;
+}
+
+static void collect_nodes(struct node *node, struct node_list **res)
+{
+    node_list_insert(node, res);
+    struct node_list *p;
+    for ( p = node->parents; p; p = p->next )
+	collect_nodes(node, res);
+}
+
+struct node_list *reachable_nodes(struct node *n1, struct node *n2)
+{
+    struct node_list *res = NULL;
+    collect_nodes(n1, &res);
+    collect_nodes(n2, &res);
+    return res;
+}
+
+struct graph *graph_build(struct node_list *commits)
+{
+	struct graph *graph = malloc(sizeof(struct graph));
+	memset(graph->commits, 0, sizeof(graph->commits));
+
+	char cmd[256];
+	strcpy(cmd, "git-rev-list --parents");
+	struct node_list *cp;
+	for_each_node_list(cp,commits) {
+		graph_add_node(graph, cp->node);
+		strcat(cmd, " ");
+		strcat(cmd, node_hex_sha1(cp->node));
+	}
+	assert(strlen(cmd) < sizeof(cmd));
+
+	FILE *fp = popen(cmd, "r");
+	if (!fp)
+		die("%s failed: %s", cmd, strerror(errno));
+	while (!feof(fp)) {
+		unsigned char sha[20];
+		int ch;
+		if (fget_sha1(sha, fp, &ch))
+			break;
+		if (EOF == ch)
+			break;
+		// a commit
+		struct node *node = graph_node_bysha(graph, sha);
+		if (!node)
+		{
+			node = node_alloc(lookup_commit(sha));
+			graph_add_node(graph, node);
+		}
+		// ...and its parents. I assume a parent cannot be mentioned
+		// before the children.
+		struct node_list *parents = NULL;
+		while ('\n' != ch) {
+			if (fget_sha1(sha, fp, &ch)) {
+				die("invalid output from %s, "
+				    "sha1 (parents) expected",
+				    cmd);
+				break;
+			}
+			if (EOF == ch)
+				break;
+			struct node *pn = graph_node_bysha(graph, sha);
+			if (!pn) {
+				pn = node_alloc(lookup_commit(sha));
+				graph_add_node(graph, pn);
+			}
+			node_list_insert(pn, &parents);
+		}
+		node_set_parents(node, parents);
+	}
+	pclose(fp);
+	return graph;
+}
+
+static int get_sha1_0(const char *ref, unsigned char *sha)
+{
+	size_t n = strlen(ref);
+	char *t = xmalloc(n + 4);
+	memcpy(t, ref, n);
+	strcpy(t + n, "^0");
+	int rc = get_sha1(t, sha);
+	free(t);
+	return rc;
+}
+
+int main(int argc, char *argv[])
+{
+	static const char *bases[2];
+	static unsigned bases_count = 0;
+
+	original_index_file = getenv("GIT_INDEX_FILE");
+
+	if (!original_index_file)
+		original_index_file = strdup(git_path("index"));
+
+	temporary_index_file = strdup(git_path("mrg-rcrsv-tmp-idx"));
+
+	if (argc < 4)
+		die("Usage: %s <base>... -- <head> <remote> ...\n", argv[0]);
+
+	int i;
+	for (i = 1; i < argc; ++i) {
+		if (!strcmp(argv[i], "--"))
+			break;
+		if (bases_count < sizeof(bases)/sizeof(*bases))
+			bases[bases_count++] = argv[i];
+	}
+	if (argc - i != 3) /* "--" "<head>" "<remote>" */
+		die("Not handling anything other than two heads merge.");
+
+	unsigned char sha1[20], sha2[20];
+	const char *branch1, *branch2;
+
+	branch1 = argv[++i];
+	if (get_sha1_0(branch1, sha1) != 0)
+		die("invalid first branch %s", branch1);
+
+	branch2 = argv[++i];
+	if (get_sha1_0(branch2, sha2) != 0)
+		die("invalid second branch %s", branch2);
+
+	printf("Merging %s with %s\n", branch1, branch2);
+
+	struct merge_result result;
+	struct node *h1 = node_alloc(lookup_commit(sha1));
+	struct node *h2 = node_alloc(lookup_commit(sha2));
+
+	if (bases_count == 1) {
+		unsigned char shabase[20];
+		if (get_sha1_0(bases[0], shabase) != 0)
+			die("invalid base commit %s", bases[0]);
+		struct node *ancestor = node_alloc(lookup_commit(shabase));
+		result = merge(h1, h2, branch1, branch2, NULL, 0, ancestor);
+	} else {
+		struct node_list *commits = NULL;
+		node_list_insert(h1, &commits);
+		node_list_insert(h2, &commits->next);
+		struct graph *graph = graph_build(commits);
+		result = merge(h1, h2, branch1, branch2, graph, 0, NULL);
+	}
+	return result.clean ? 0: 1;
+}
+
+// vim: sw=8 noet
diff --git a/path-list.c b/path-list.c
new file mode 100644
index 0000000..fbfc103
--- /dev/null
+++ b/path-list.c
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include "cache.h"
+#include "path-list.h"
+
+/* if there is no exact match, point to the index where the entry could be
+ * inserted */
+static int get_entry_index(const struct path_list *container, const char *path,
+		int *exact_match)
+{
+	int left = -1, right = container->nr;
+
+	while (left + 1 < right) {
+		int middle = (left + right) / 2;
+		int compare = strcmp(path, container->paths[middle]);
+		if (compare < 0)
+			right = middle;
+		else if (compare > 0)
+			left = middle;
+		else {
+			*exact_match = 1;
+			return middle;
+		}
+	}
+
+	*exact_match = 0;
+	return right;
+}
+
+/* returns -1-index if already exists */
+static int add_entry(struct path_list *container, const char *path)
+{
+	int exact_match;
+	int index = get_entry_index(container, path, &exact_match);
+
+	if (exact_match)
+		return -1 - index;
+
+	if (container->nr + 1 >= container->alloc) {
+		container->alloc += 32;
+		container->paths = realloc(container->paths,
+				container->alloc * sizeof(char *));
+	}
+	if (index < container->nr)
+		memmove(container->paths + index + 1,
+				container->paths + index,
+				(container->nr - index) * sizeof(char *));
+	container->paths[index] = strdup(path);
+	container->nr++;
+
+	return index;
+}
+
+char *path_list_insert(char *path, struct path_list *list)
+{
+	int index = add_entry(list, path);
+
+	if (index < 0)
+		index = 1 - index;
+
+	return list->paths[index];
+}
+
+int path_list_has_path(const struct path_list *list, const char *path)
+{
+	int exact_match;
+	get_entry_index(list, path, &exact_match);
+	return exact_match;
+}
+
+// in place
+void path_list_union_update(struct path_list *dst, const struct path_list *src)
+{
+	char **new_paths;
+	int i = 0, j = 0, nr = 0, alloc = dst->nr + dst->nr;
+
+	new_paths = xcalloc(sizeof(char *), alloc);
+
+	while (i < dst->nr || j < src->nr) {
+		char **entry = new_paths + nr++;
+		if (i == dst->nr)
+			*entry = src->paths[j++];
+		else if (j == src->nr)
+			*entry = dst->paths[i++];
+		else {
+			int compare = strcmp(dst->paths[i], src->paths[j]);
+			if (compare > 0)
+				*entry = src->paths[j++];
+			else {
+				*entry = dst->paths[i++];
+				if (!compare)
+					free(src->paths[j++]);
+			}
+		}
+	}
+
+	free(dst->paths);
+	dst->paths = new_paths;
+	dst->nr = nr;
+	dst->alloc = alloc;
+}
+
+void print_path_list(const char *text, const struct path_list *p)
+{
+	int i;
+	if ( text )
+		printf("%s\n", text);
+	for (i = 0; i < p->nr; i++)
+		printf("%s\n", p->paths[i]);
+}
+
diff --git a/path-list.h b/path-list.h
new file mode 100644
index 0000000..a12c7a4
--- /dev/null
+++ b/path-list.h
@@ -0,0 +1,32 @@
+#ifndef _PATH_LIST_H_
+#define _PATH_LIST_H_
+
+struct path_list
+{
+    char **paths;
+    unsigned int nr, alloc;
+};
+
+#define for_each_path(p,list) for ( p = (list)->paths; p != (list)->paths + (list)->nr; p++ )
+
+void print_path_list(const char *text, const struct path_list *p);
+
+#define path_list_count(list) (list)->nr
+
+int path_list_has_path(const struct path_list *list, const char *path);
+void path_list_union_update(struct path_list *dst, const struct path_list *src);
+static inline void path_list_clear(struct path_list *list, int free_paths)
+{
+	if (list->paths) {
+		int i;
+		if (free_paths)
+			for (i = 0; i < list->nr; i++)
+				free(list->paths[i]);
+		free(list->paths);
+	}
+	list->paths = NULL;
+	list->nr = list->alloc = 0;
+}
+char *path_list_insert(char *path, struct path_list *list);
+
+#endif /* _PATH_LIST_H_ */

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 22:32   ` Alex Riesen
@ 2006-06-28  9:34     ` Alex Riesen
  2006-06-28 10:04       ` Johannes Schindelin
  2006-06-29  0:25       ` Alex Riesen
  2006-06-29 13:16     ` Johannes Schindelin
                       ` (2 subsequent siblings)
  3 siblings, 2 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-28  9:34 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin, Junio C Hamano, Fredrik Kuivinen,
	Linus Torvalds

[-- Attachment #1: Type: text/plain, Size: 455 bytes --]

On 6/28/06, Alex Riesen <fork0@t-online.de> wrote:
> Johannes Schindelin, Tue, Jun 27, 2006 18:42:51 +0200:
> > > - use a pipe to "git-update-index --index-info" instead of using command
> > > line

...and to take it a step further, a patch (0002) to avoid too many calls to
git-write-tree and to git-update-index. Brought merge times on my test
monster (~25k files) down to 2min 30sec (from something around 11 min).
The 0001 patch is just C++ comments.

[-- Attachment #2: 0001-update-comments-and-remove-C.txt --]
[-- Type: text/plain, Size: 13049 bytes --]

From 36147880afad3a2d53f6474bec069e29d82a74a8 Mon Sep 17 00:00:00 2001
From: Riesen <ARiesen@oekap852.hbi.ad.harman.com>
Date: Wed, 28 Jun 2006 09:46:47 +0200
Subject: update comments and remove C++ //...
---
 graph.c           |   14 ++++--
 merge-recursive.c |  131 +++++++++++++++++++++++++----------------------------
 path-list.c       |    2 -
 3 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/graph.c b/graph.c
index fa2bfee..1a28302 100644
--- a/graph.c
+++ b/graph.c
@@ -5,7 +5,7 @@ #include "cache.h"
 #include "commit.h"
 #include "graph.h"
 
-// does not belong here
+/* does not belong here */
 struct tree *git_write_tree()
 {
 #if 0
@@ -189,8 +189,10 @@ struct node_list *node_list_find_node(co
 	return (struct node_list*)p;
 }
 
-// a & b. a and are invalid after the call,
-// the result will contain all the common nodes
+/*
+ * a & b. a and are invalid after the call,
+ * the result will contain all the common nodes
+ */
 struct node_list *node_list_intersect(struct node_list *a,
 				      struct node_list *b)
 {
@@ -214,7 +216,7 @@ struct node *graph_add_node(struct graph
 {
 	struct node_list **bucket;
 	if ( node->virtual )
-		// virtual nodes hashed by lowest byte of virtual_id
+		/* virtual nodes hashed by lowest byte of virtual_id */
 		bucket = graph->commits + (node->virtual_id & 0xff);
 	else
 		bucket = graph->commits + node->commit->object.sha1[0];
@@ -249,4 +251,6 @@ void node_list_print(const char *msg, co
 }
 #endif
 
-// vim: sw=8 noet
+/*
+vim: sw=8 noet
+*/
diff --git a/merge-recursive.c b/merge-recursive.c
index 9bbb426..ba5b024 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -1,7 +1,7 @@
-//
-// Recursive Merge algorithm stolen from git-merge-recursive.py by
-// Fredrik Kuivinen.
-//
+/*
+ * Recursive Merge algorithm stolen from git-merge-recursive.py by
+ * Fredrik Kuivinen.
+ */
 #include <stdarg.h>
 #include <string.h>
 #include <assert.h>
@@ -47,7 +47,7 @@ struct index_entry
 
 struct index_entry *index_entry_alloc(const char *path)
 {
-	size_t n = strlen(path); // index_entry::path has room for \0
+	size_t n = strlen(path); /* index_entry::path has room for \0 */
 	struct index_entry *p = xmalloc(sizeof(struct index_entry) + n);
 	if ( !p )
 		return NULL;
@@ -102,14 +102,16 @@ static void setup_index(int temp)
 	setenv("GIT_INDEX_FILE", idx, 1);
 }
 
-// This is a global variable which is used in a number of places but
-// only written to in the 'merge' function.
-
-// index_only == 1    => Don't leave any non-stage 0 entries in the cache and
-//                       don't update the working directory.
-//               0    => Leave unmerged entries in the cache and update
-//                       the working directory.
-static int index_only = 0; // cacheOnly
+/*
+ * This is a global variable which is used in a number of places but
+ * only written to in the 'merge' function.
+ *
+ * index_only == 1    => Don't leave any non-stage 0 entries in the cache and
+ *                       don't update the working directory.
+ *               0    => Leave unmerged entries in the cache and update
+ *                       the working directory.
+ */
+static int index_only = 0;
 
 static int git_read_tree(const struct tree *tree)
 {
@@ -157,10 +159,10 @@ struct merge_tree_result merge_trees(str
 
 static int fget_sha1(unsigned char *sha, FILE *fp, int *ch);
 
-// The entry point to the merge code
-
-// Merge the commits h1 and h2, return the resulting virtual
-// commit object and a flag indicating the cleaness of the merge.
+/*
+ * Merge the commits h1 and h2, return the resulting virtual
+ * commit object and a flag indicating the cleaness of the merge.
+ */
 static
 struct merge_result merge(struct node *h1,
 			  struct node *h2,
@@ -262,7 +264,7 @@ typedef int (*read_tree_rt_fn_t)(const c
 				 unsigned mode,
 				 void *data);
 
-// git-ls-tree -r -t <tree>
+/* git-ls-tree -r -t <tree> */
 static int read_tree_rt(struct tree *tree,
 			const char *base,
 			int baselen,
@@ -391,8 +393,10 @@ static int find_entry(const char *sha,
 	return READ_TREE_RECURSIVE;
 }
 
-// Returns a CacheEntry object which doesn't have to correspond to
-// a real cache entry in Git's index.
+/*
+ * Returns a index_entry instance which doesn't have to correspond to
+ * a real cache entry in Git's index.
+ */
 struct index_entry *index_entry_from_db(const char *path,
 					struct tree *o,
 					struct tree *a,
@@ -404,24 +408,18 @@ struct index_entry *index_entry_from_db(
 	data.sha = e->stages[1].sha;
 	data.mode = &e->stages[1].mode;
 	if ( read_tree_rt(o, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "1: %s:%s not found\n",
-		// 	sha1_to_hex(o->object.sha1), path);
 		memcpy(e->stages[1].sha, null_sha1, 20);
 		e->stages[1].mode = 0;
 	}
 	data.sha = e->stages[2].sha;
 	data.mode = &e->stages[2].mode;
 	if ( read_tree_rt(a, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "2: %s:%s not found\n",
-		// 	sha1_to_hex(a->object.sha1), path);
 		memcpy(e->stages[2].sha, null_sha1, 20);
 		e->stages[2].mode = 0;
 	}
 	data.sha = e->stages[3].sha;
 	data.mode = &e->stages[3].mode;
 	if ( read_tree_rt(b, "", 0, find_entry, &data) != READ_TREE_FOUND ) {
-		// fprintf(stderr, "3: %s:%s not found\n",
-		// 	sha1_to_hex(b->object.sha1), path);
 		memcpy(e->stages[3].sha, null_sha1, 20);
 		e->stages[3].mode = 0;
 	}
@@ -469,8 +467,11 @@ static int fget_sha1(unsigned char *sha,
 		return -1;
 	return 0;
 }
-// Create a dictionary mapping file names to CacheEntry objects. The
-// dictionary contains one entry for every path with a non-zero stage entry.
+
+/*
+ * Create a dictionary mapping file names to CacheEntry objects. The
+ * dictionary contains one entry for every path with a non-zero stage entry.
+ */
 struct index_entry *unmergedCacheEntries()
 {
 	struct index_entry *unmerged = NULL;
@@ -484,17 +485,17 @@ struct index_entry *unmergedCacheEntries
 		char stage = '0';
 		char path[PATH_MAX];
 		int p;
-		// mode
+		/* mode */
 		if ( fget_mode(&mode, fp, &ch) )
 			goto wait_eol;
 		if ( '\x20' != ch )
 			goto wait_eol;
-		// SHA1
+		/* SHA1 */
 		if ( fget_sha1(sha, fp, &ch) )
 			goto wait_eol;
 		if ( '\x20' != ch )
 			goto wait_eol;
-		// stage
+		/* stage */
 		if ( (ch = fgetc(fp)) != EOF ) {
 			stage = ch;
 			if ( ch < '1' || ch > '3' )
@@ -502,7 +503,7 @@ struct index_entry *unmergedCacheEntries
 		}
 		if ( (ch = fgetc(fp)) == EOF || '\t' != ch )
 			goto wait_eol;
-		// path
+		/* path */
 		for ( p = 0; (ch = fgetc(fp)) != EOF; ++p ) {
 			path[p] = ch;
 			if ( !ch )
@@ -515,7 +516,6 @@ struct index_entry *unmergedCacheEntries
 		}
 		if ( ch )
 			goto wait_eol;
-		// printf("unmerged %08o %s %c %s\n",mode,sha1_to_hex(sha),stage,path);
 		struct index_entry *e = index_entry_get(&unmerged, path);
 		e->stages[stage - '1' + 1].mode = mode;
 		memcpy(e->stages[stage - '1' + 1].sha, sha, 20);
@@ -583,10 +583,12 @@ void free_rename_entries(struct rename_e
 	}
 }
 
-// Get information of all renames which occured between 'oTree' and
-// 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
-// 'bTree') to be able to associate the correct cache entries with
-// the rename information. 'tree' is always equal to either aTree or bTree.
+/*
+ * Get information of all renames which occured between 'oTree' and
+ * 'tree'. We need the three trees in the merge ('oTree', 'aTree' and
+ * 'bTree') to be able to associate the correct cache entries with
+ * the rename information. 'tree' is always equal to either aTree or bTree.
+ */
 struct rename_entry *getRenames(struct tree *tree,
 				struct tree *oTree,
 				struct tree *aTree,
@@ -831,7 +833,6 @@ void updateFileExt(FILE *fp,
 	}
 	if ( updateCache )
 	{
-		// XXX just always use "git update-index --index-info"?
 		fprintf(fp, "%06o %s\t%s", mode, sha1_to_hex(sha), path);
 		fputc('\0', fp);
 	}
@@ -846,8 +847,8 @@ void updateFile(FILE *fp,
 	updateFileExt(fp, sha, mode, path, index_only || clean, !index_only);
 }
 
-// Low level file merging, update and removal
-// ------------------------------------------
+/* Low level file merging, update and removal */
+
 struct merge_file_info
 {
 	unsigned char sha[20];
@@ -991,9 +992,6 @@ int processRenames(struct rename_entry *
 		   const char *branchNameB)
 {
 	int cleanMerge = 1;
-	//    printf("process renames %s:%s -> %s:%s\n",
-	//	   branchNameA, renamesA ? renamesA->src: "(none)",
-	//	   branchNameB, renamesB ? renamesB->dst: "(none)");
 
 	struct path_list srcNames = {NULL, 0, 0};
 	const struct rename_entry *sre;
@@ -1033,7 +1031,7 @@ int processRenames(struct rename_entry *
 		ren1->processed = 1;
 
 		if ( ren2 ) {
-			// Renamed in 1 and renamed in 2
+			/* Renamed in 1 and renamed in 2 */
 			if ( strcmp(ren1->src, ren2->src) != 0 )
 				die("ren1.srcName != ren2.srcName");
 			ren2->dst_entry->processed = 1;
@@ -1097,7 +1095,7 @@ int processRenames(struct rename_entry *
 				updateFile(fp, mfi.clean, mfi.sha, mfi.mode, ren1->dst);
 			}
 		} else {
-			// Renamed in 1, maybe changed in 2
+			/* Renamed in 1, maybe changed in 2 */
 			removeFile(fp, 1, ren1->src);
 
 			unsigned char srcShaOtherBranch[20], dstShaOtherBranch[20];
@@ -1224,15 +1222,15 @@ static int sha_eq(const unsigned char *a
 	return a && b && memcmp(a, b, 20) == 0;
 }
 
-// Per entry merge function
-// ------------------------
-// Merge one cache entry.
+/* Per entry merge function */
 int processEntry(struct index_entry *entry,
 		 const char *branch1Name,
 		 const char *branch2Name)
 {
-	//    printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
-	//    print_index_entry("\tpath: ", entry);
+	/*
+	printf("processing entry, clean cache: %s\n", index_only ? "yes": "no");
+	print_index_entry("\tpath: ", entry);
+	*/
 	int cleanMerge = 1;
 	const char *path = entry->path;
 	unsigned char *oSha = has_sha(entry->stages[1].sha);
@@ -1244,18 +1242,17 @@ int processEntry(struct index_entry *ent
 	FILE *fp = git_update_index_pipe();
 
 	if ( oSha && (!aSha || !bSha) ) {
-		//
-		// Case A: Deleted in one
-		//
+		/* Case A: Deleted in one */
 		if ( (!aSha && !bSha) ||
 		     (sha_eq(aSha, oSha) && !bSha) ||
 		     (!aSha && sha_eq(bSha, oSha)) ) {
-			// Deleted in both or deleted in one and unchanged in the other
+			/* Deleted in both or deleted in one and
+			 * unchanged in the other */
 			if ( aSha )
 				output("Removing %s", path);
 			removeFile(fp, 1, path);
 		} else {
-			// Deleted in one and changed in the other
+			/* Deleted in one and changed in the other */
 			cleanMerge = 0;
 			if ( !aSha ) {
 				output("CONFLICT (delete/modify): %s deleted in %s "
@@ -1274,9 +1271,7 @@ int processEntry(struct index_entry *ent
 
 	} else if ( (!oSha && aSha && !bSha) ||
 		    (!oSha && !aSha && bSha) ) {
-		//
-		// Case B: Added in one.
-		//
+		/* Case B: Added in one. */
 		const char *addBranch;
 		const char *otherBranch;
 		unsigned mode;
@@ -1309,9 +1304,7 @@ int processEntry(struct index_entry *ent
 			updateFile(fp, 1, sha, mode, path);
 		}
 	} else if ( !oSha && aSha && bSha ) {
-		//
-		// Case C: Added in both (check for same permissions).
-		//
+		/* Case C: Added in both (check for same permissions). */
 		if ( sha_eq(aSha, bSha) ) {
 			if ( aMode != bMode ) {
 				cleanMerge = 0;
@@ -1321,7 +1314,7 @@ int processEntry(struct index_entry *ent
 				output("CONFLICT: adding with permission: %06o", aMode);
 				updateFile(fp, 0, aSha, aMode, path);
 			} else {
-				// This case is handled by git-read-tree
+				/* This case is handled by git-read-tree */
 				assert(0 && "This case must be handled by git-read-tree");
 			}
 		} else {
@@ -1337,9 +1330,7 @@ int processEntry(struct index_entry *ent
 		}
 
 	} else if ( oSha && aSha && bSha ) {
-		//
-		// case D: Modified in both, but differently.
-		//
+		/* case D: Modified in both, but differently. */
 		output("Auto-merging %s\n", path);
 		struct merge_file_info mfi;
 		mfi = mergeFile(path, oSha, oMode,
@@ -1472,15 +1463,15 @@ struct graph *graph_build(struct node_li
 			break;
 		if (EOF == ch)
 			break;
-		// a commit
+		/* a commit */
 		struct node *node = graph_node_bysha(graph, sha);
 		if (!node)
 		{
 			node = node_alloc(lookup_commit(sha));
 			graph_add_node(graph, node);
 		}
-		// ...and its parents. I assume a parent cannot be mentioned
-		// before the children.
+		/* ...and its parents. I assume a parent cannot be mentioned
+		   before the children. */
 		struct node_list *parents = NULL;
 		while ('\n' != ch) {
 			if (fget_sha1(sha, fp, &ch)) {
@@ -1573,4 +1564,6 @@ int main(int argc, char *argv[])
 	return result.clean ? 0: 1;
 }
 
-// vim: sw=8 noet
+/*
+vim: sw=8 noet
+*/
diff --git a/path-list.c b/path-list.c
index fbfc103..95395ab 100644
--- a/path-list.c
+++ b/path-list.c
@@ -67,7 +67,7 @@ int path_list_has_path(const struct path
 	return exact_match;
 }
 
-// in place
+/* in place */
 void path_list_union_update(struct path_list *dst, const struct path_list *src)
 {
 	char **new_paths;
-- 
1.4.1.rc1.g8b09-dirty


[-- Attachment #3: 0002-fix-processEntries-to-avoid-multiple-calls-to-write-tree-and-update-index.txt --]
[-- Type: text/plain, Size: 5295 bytes --]

From b2fbfc22526fb2a1f56ef56836b7412c53524e60 Mon Sep 17 00:00:00 2001
From: Riesen <ARiesen@oekap852.hbi.ad.harman.com>
Date: Wed, 28 Jun 2006 10:45:05 +0200
Subject: fix processEntries to avoid multiple calls to write-tree and update-index
---
 merge-recursive.c |   47 ++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index ba5b024..5b7ed51 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -19,6 +19,13 @@ #include "run-command.h"
 #include "graph.h"
 #include "path-list.h"
 
+#define DEBUG
+#ifdef DEBUG
+#define debug(args, ...) fprintf(stderr, args, ## __VA_ARGS__)
+#else
+#define debug(args, ...)
+#endif
+
 #define for_each_commit(p,list) for ( p = (list); p; p = p->next )
 
 struct merge_result
@@ -345,9 +352,14 @@ int getFilesAndDirs(struct tree *tree,
 	path_list_clear(dirs, 1);
 	data.files = files;
 	data.dirs = dirs;
-	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 )
+	debug("getFilesAndDirs ...\n");
+	if ( read_tree_rt(tree, "", 0, save_files_dirs, &data) != 0 ) {
+		debug("\tgetFilesAndDirs done (0)\n");
 		return 0;
-	return path_list_count(files) + path_list_count(dirs);
+	}
+	int n = path_list_count(files) + path_list_count(dirs);
+	debug("\tgetFilesAndDirs done (%d)\n", n);
+	return n;
 }
 
 struct index_entry *index_entry_find(struct index_entry *ents, const char *path)
@@ -478,6 +490,7 @@ struct index_entry *unmergedCacheEntries
 	FILE *fp = popen("git-ls-files -z --unmerged", "r");
 	if ( !fp )
 		return NULL;
+	debug("unmergedCacheEntries...\n");
 	int ch;
 	while ( !feof(fp) ) {
 		unsigned mode;
@@ -524,6 +537,7 @@ struct index_entry *unmergedCacheEntries
 		while ( (ch = fgetc(fp)) != EOF && ch );
 	}
 	pclose(fp);
+	debug("\tunmergedCacheEntries done\n");
 	return unmerged;
 }
 
@@ -595,6 +609,7 @@ struct rename_entry *getRenames(struct t
 				struct tree *bTree,
 				struct index_entry **entries)
 {
+	debug("getRenames ...\n");
 	struct rename_entry *renames = NULL;
 	struct rename_entry **rptr = &renames;
 	struct diff_options opts;
@@ -639,6 +654,7 @@ struct rename_entry *getRenames(struct t
 	}
 	opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 	diff_flush(&opts);
+	debug("\tgetRenames done\n");
 	return renames;
 }
 
@@ -991,6 +1007,7 @@ int processRenames(struct rename_entry *
 		   const char *branchNameA,
 		   const char *branchNameB)
 {
+	debug("processRenames...\n");
 	int cleanMerge = 1;
 
 	struct path_list srcNames = {NULL, 0, 0};
@@ -1207,6 +1224,7 @@ int processRenames(struct rename_entry *
 	if (pclose(fp)) {
 		die("git update-index --index-info failed");
 	}
+	debug("\tprocessRenames done\n");
 	return cleanMerge;
 }
 
@@ -1223,7 +1241,8 @@ static int sha_eq(const unsigned char *a
 }
 
 /* Per entry merge function */
-int processEntry(struct index_entry *entry,
+int processEntry(FILE *fp,
+		 struct index_entry *entry,
 		 const char *branch1Name,
 		 const char *branch2Name)
 {
@@ -1239,7 +1258,6 @@ int processEntry(struct index_entry *ent
 	unsigned oMode = entry->stages[1].mode;
 	unsigned aMode = entry->stages[2].mode;
 	unsigned bMode = entry->stages[3].mode;
-	FILE *fp = git_update_index_pipe();
 
 	if ( oSha && (!aSha || !bSha) ) {
 		/* Case A: Deleted in one */
@@ -1353,8 +1371,6 @@ int processEntry(struct index_entry *ent
 	} else
 		die("Fatal merge failure, shouldn't happen.");
 
-	if (pclose(fp))
-		die("updating entry failed in git update-index");
 	return cleanMerge;
 }
 
@@ -1373,6 +1389,7 @@ struct merge_tree_result merge_trees(str
 		return result;
 	}
 
+	debug("merge_trees ...\n");
 	code = git_merge_trees(index_only ? "-i": "-u", common, head, merge);
 
 	if ( code != 0 )
@@ -1397,17 +1414,22 @@ struct merge_tree_result merge_trees(str
 		renamesMerge = getRenames(merge, common, head, merge, &entries);
 		result.clean = processRenames(renamesHead, renamesMerge,
 					      branch1Name, branch2Name);
+		debug("\tprocessing entries...\n");
+		FILE *fp = git_update_index_pipe();
 		struct index_entry *e;
 		for ( e = entries; e; e = e->next ) {
 			if ( e->processed )
 				continue;
-			if ( !processEntry(e, branch1Name, branch2Name) )
+			if ( !processEntry(fp, e, branch1Name, branch2Name) )
 				result.clean = 0;
-			if ( result.clean || index_only )
-				result.tree = git_write_tree();
-			else
-				result.tree = NULL;
 		}
+		if (pclose(fp))
+			die("updating entry failed in git update-index");
+		if ( result.clean || index_only )
+			result.tree = git_write_tree();
+		else
+			result.tree = NULL;
+		debug("\t\tprocessing entries done\n");
 		free_rename_entries(&renamesMerge);
 		free_rename_entries(&renamesHead);
 		free_index_entries(&entries);
@@ -1419,6 +1441,7 @@ struct merge_tree_result merge_trees(str
 		       sha1_to_hex(result.tree->object.sha1));
 	}
 
+	debug("\tmerge_trees done\n");
 	return result;
 }
 
@@ -1558,7 +1581,9 @@ int main(int argc, char *argv[])
 		struct node_list *commits = NULL;
 		node_list_insert(h1, &commits);
 		node_list_insert(h2, &commits->next);
+		debug("building graph...\n");
 		struct graph *graph = graph_build(commits);
+		debug("\tbuilding graph...\n");
 		result = merge(h1, h2, branch1, branch2, graph, 0, NULL);
 	}
 	return result.clean ? 0: 1;
-- 
1.4.1.rc1.g8b09-dirty


^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28  9:34     ` Alex Riesen
@ 2006-06-28 10:04       ` Johannes Schindelin
  2006-06-28 11:35         ` Alex Riesen
  2006-06-29  0:25       ` Alex Riesen
  1 sibling, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-28 10:04 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

On Wed, 28 Jun 2006, Alex Riesen wrote:

> On 6/28/06, Alex Riesen <fork0@t-online.de> wrote:
> > Johannes Schindelin, Tue, Jun 27, 2006 18:42:51 +0200:
> > > > - use a pipe to "git-update-index --index-info" instead of using command
> > > > line
> 
> ...and to take it a step further, a patch (0002) to avoid too many calls to
> git-write-tree and to git-update-index.

... and introduces a lot more lines doing debug output.

However, the change is good, but I would not call it "FILE *fp". IMHO 
"FILE *update_index_pipe" would explain better what you do there.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28 10:04       ` Johannes Schindelin
@ 2006-06-28 11:35         ` Alex Riesen
  2006-06-28 12:55           ` Johannes Schindelin
  0 siblings, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-28 11:35 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

On 6/28/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > > > > - use a pipe to "git-update-index --index-info" instead of using command
> > > > > line
> >
> > ...and to take it a step further, a patch (0002) to avoid too many calls to
> > git-write-tree and to git-update-index.
>
> ... and introduces a lot more lines doing debug output.

well, that's how I found out about what to fix. Was really impressed
when saw the difference :)
What stands out next is getRenames (to be renamed into get_renames),
a little profiling shows that the renames lists are the culprit this time too.

I actually expected these problems, but decided to postpone the
optimization for later: linked lists are comfortable to work with.
I didn't had much time for this project, and the first commit is
dated 7th June - it was a very slow progress.

> However, the change is good, but I would not call it "FILE *fp". IMHO
> "FILE *update_index_pipe" would explain better what you do there.

just update_index would be enough. It can't possibly mean anything else,
being FILE* in that context.

By the way, is it safe to use "git-update-index --index-info"?
AFAICS it was designed for this kind of use, but the most
visible user of it (git-update-recursive.py) didn't use --index-info
this way! Was there any specific reasons?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28 11:35         ` Alex Riesen
@ 2006-06-28 12:55           ` Johannes Schindelin
  2006-06-28 14:27             ` Alex Riesen
  0 siblings, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-28 12:55 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git

Hi,

On Wed, 28 Jun 2006, Alex Riesen wrote:

> What stands out next is getRenames (to be renamed into get_renames), a 
> little profiling shows that the renames lists are the culprit this time 
> too.

In my attempts, the path_list did not contain paths, but structs 
containing a path and a void pointer. I think I will resurrect this idea 
for the renames.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28 12:55           ` Johannes Schindelin
@ 2006-06-28 14:27             ` Alex Riesen
  2006-06-28 15:09               ` Johannes Schindelin
  0 siblings, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-28 14:27 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git

On 6/28/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>
> > What stands out next is getRenames (to be renamed into get_renames), a
> > little profiling shows that the renames lists are the culprit this time
> > too.
>
> In my attempts, the path_list did not contain paths, but structs
> containing a path and a void pointer. I think I will resurrect this idea
> for the renames.
>

What was the pointer for?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28 14:27             ` Alex Riesen
@ 2006-06-28 15:09               ` Johannes Schindelin
  0 siblings, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-28 15:09 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git

Hi,

On Wed, 28 Jun 2006, Alex Riesen wrote:

> On 6/28/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > 
> > > What stands out next is getRenames (to be renamed into get_renames), a
> > > little profiling shows that the renames lists are the culprit this time
> > > too.
> > 
> > In my attempts, the path_list did not contain paths, but structs
> > containing a path and a void pointer. I think I will resurrect this idea
> > for the renames.
> > 
> 
> What was the pointer for?

We want to emulate the set with a sorted list. The pointer is for the 
payload.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-28  9:34     ` Alex Riesen
  2006-06-28 10:04       ` Johannes Schindelin
@ 2006-06-29  0:25       ` Alex Riesen
  2006-06-29  2:41         ` Junio C Hamano
  1 sibling, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-29  0:25 UTC (permalink / raw)
  To: git; +Cc: Johannes Schindelin, Junio C Hamano, Fredrik Kuivinen,
	Linus Torvalds

Alex Riesen, Wed, Jun 28, 2006 11:34:23 +0200:
> >> > - use a pipe to "git-update-index --index-info" instead of
> >> > using command line
> 
> ...and to take it a step further, a patch (0002) to avoid too many calls to
> git-write-tree and to git-update-index. Brought merge times on my test
> monster (~25k files) down to 2min 30sec (from something around 11 min).

this broke t6022-merge-rename.sh (the second test). It produces an
index with this:

.../t/trash$ git-diff-index white
:100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B
:100644 100644 ba41fb96393979b22691106b06bf5231eab57b85 0000000000000000000000000000000000000000 M      N

whereas git-merge-recursive (and the previous version, without pipe):

.../t/trash$ git-diff-index white
:100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B

I can see that "git update-index --add" is somehow different from a
pipe to "git update-index --index-info", but not very clear. Does this
"zero-sha1" mean that the file "N" is not in the index?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-29  0:25       ` Alex Riesen
@ 2006-06-29  2:41         ` Junio C Hamano
  2006-06-29  8:43           ` Alex Riesen
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2006-06-29  2:41 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git

fork0@t-online.de (Alex Riesen) writes:

> this broke t6022-merge-rename.sh (the second test). It produces an
> index with this:
>
> .../t/trash$ git-diff-index white
> :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B
> :100644 100644 ba41fb96393979b22691106b06bf5231eab57b85 0000000000000000000000000000000000000000 M      N
>
> whereas git-merge-recursive (and the previous version, without pipe):
>
> .../t/trash$ git-diff-index white
> :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B
>
> I can see that "git update-index --add" is somehow different from a
> pipe to "git update-index --index-info", but not very clear. Does this
> "zero-sha1" mean that the file "N" is not in the index?

When diff-index and diff-files compare a tree entry or an index
entry with a file in the working tree, they do not compute the
blob hash value for the file in the working tree.  0{40} is used
on the RHS in such a case.  When the working tree file matches
the corresponding index entry, then we know RHS matches what is
in the index, so both sides have the blob hash value.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-29  2:41         ` Junio C Hamano
@ 2006-06-29  8:43           ` Alex Riesen
  2006-06-29 18:40             ` Johannes Schindelin
  0 siblings, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-29  8:43 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Schindelin, Fredrik Kuivinen, Linus Torvalds

[cc list restored, I'm lost in the maze of git update-index, all cache
changing functions looking almost the same]

On 6/29/06, Junio C Hamano <junkio@cox.net> wrote:
> > this broke t6022-merge-rename.sh (the second test). It produces an
> > index with this:
> >
> > .../t/trash$ git-diff-index white
> > :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B
> > :100644 100644 ba41fb96393979b22691106b06bf5231eab57b85 0000000000000000000000000000000000000000 M      N
> >
> > whereas git-merge-recursive (and the previous version, without pipe):
> >
> > .../t/trash$ git-diff-index white
> > :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a 0000000000000000000000000000000000000000 M      B
> >
> > I can see that "git update-index --add" is somehow different from a
> > pipe to "git update-index --index-info", but not very clear. Does this
> > "zero-sha1" mean that the file "N" is not in the index?
>
> When diff-index and diff-files compare a tree entry or an index
> entry with a file in the working tree, they do not compute the
> blob hash value for the file in the working tree.  0{40} is used
> on the RHS in such a case.  When the working tree file matches
> the corresponding index entry, then we know RHS matches what is
> in the index, so both sides have the blob hash value.

Ok. Am I correct in the assumption, that if the file in working tree has
the same SHA1 as LHS, than the next "git-update-index --refresh" will
remove the entry from git-diff-index output?
This is what actually happens, if I do "git-update-index --refresh", so I
suspect that I have an SHA1 update gone missing somewhere.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-29  8:43           ` Alex Riesen
@ 2006-06-29 18:40             ` Johannes Schindelin
  2006-06-29 23:28               ` Alex Riesen
  0 siblings, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 18:40 UTC (permalink / raw)
  To: Alex Riesen; +Cc: Junio C Hamano, git, Fredrik Kuivinen, Linus Torvalds

Hi,

On Thu, 29 Jun 2006, Alex Riesen wrote:

> [cc list restored, I'm lost in the maze of git update-index, all cache
> changing functions looking almost the same]
> 
> On 6/29/06, Junio C Hamano <junkio@cox.net> wrote:
> > > this broke t6022-merge-rename.sh (the second test). It produces an
> > > index with this:
> > >
> > > .../t/trash$ git-diff-index white
> > > :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a
> > 0000000000000000000000000000000000000000 M      B
> > > :100644 100644 ba41fb96393979b22691106b06bf5231eab57b85
> > 0000000000000000000000000000000000000000 M      N
> > >
> > > whereas git-merge-recursive (and the previous version, without pipe):
> > >
> > > .../t/trash$ git-diff-index white
> > > :100644 100644 2d603156dc5bdf6295c789cac08e3c9942a0b82a
> > 0000000000000000000000000000000000000000 M      B
> > >
> > > I can see that "git update-index --add" is somehow different from a
> > > pipe to "git update-index --index-info", but not very clear. Does this
> > > "zero-sha1" mean that the file "N" is not in the index?
> > 
> > When diff-index and diff-files compare a tree entry or an index
> > entry with a file in the working tree, they do not compute the
> > blob hash value for the file in the working tree.  0{40} is used
> > on the RHS in such a case.  When the working tree file matches
> > the corresponding index entry, then we know RHS matches what is
> > in the index, so both sides have the blob hash value.
> 
> Ok. Am I correct in the assumption, that if the file in working tree has
> the same SHA1 as LHS, than the next "git-update-index --refresh" will
> remove the entry from git-diff-index output?
> This is what actually happens, if I do "git-update-index --refresh", so I
> suspect that I have an SHA1 update gone missing somewhere.

I think the problem is more like this (in ce_match_stat_basic()):

        if (ce->ce_mtime.sec != htonl(st->st_mtime))
                changed |= MTIME_CHANGED;
        if (ce->ce_ctime.sec != htonl(st->st_ctime))
                changed |= CTIME_CHANGED;

If you update with --index-info, the mtime and ctime is not updated 
from the file in the working directory.

All the more a reason to go forward with direct calls to read_cache() and 
write_cache(). At the moment, my plan is

- trivially split the read_cache() code into read_cache()/read_cache_from(),
- introduce flush_cache(),
- trivially rewrite add_file_to_cache(), and add_cache_info() from
  builtin-update-index.c, move that to read-tree.c, too, and
- throw out the pipe to git-update-index from merge-recursive.c 
  altogether.

This should be less intrusive than it sounds: with the introduction of 
read_cache_from() it should be trivial to handle the problem of different 
index files.

We have to put a lock_file on the index file at the start, and write the 
index file at the end.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-29 18:40             ` Johannes Schindelin
@ 2006-06-29 23:28               ` Alex Riesen
  0 siblings, 0 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-29 23:28 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git, Fredrik Kuivinen, Linus Torvalds

Johannes Schindelin, Thu, Jun 29, 2006 20:40:46 +0200:
> > > 
> > > When diff-index and diff-files compare a tree entry or an index
> > > entry with a file in the working tree, they do not compute the
> > > blob hash value for the file in the working tree.  0{40} is used
> > > on the RHS in such a case.  When the working tree file matches
> > > the corresponding index entry, then we know RHS matches what is
> > > in the index, so both sides have the blob hash value.
> > 
> > Ok. Am I correct in the assumption, that if the file in working tree has
> > the same SHA1 as LHS, than the next "git-update-index --refresh" will
> > remove the entry from git-diff-index output?
> > This is what actually happens, if I do "git-update-index --refresh", so I
> > suspect that I have an SHA1 update gone missing somewhere.
> 
> I think the problem is more like this (in ce_match_stat_basic()):
> 
>         if (ce->ce_mtime.sec != htonl(st->st_mtime))
>                 changed |= MTIME_CHANGED;
>         if (ce->ce_ctime.sec != htonl(st->st_ctime))
>                 changed |= CTIME_CHANGED;
> 
> If you update with --index-info, the mtime and ctime is not updated 
> from the file in the working directory.

Oh, I see. I was under impression git-diff-index does not use mtime/ctime.
Thought it was just mode+sha between treeish and index.
    $ man git-diff-index
       git-diff-index <tree-ish>
              compares the <tree-ish> and the files on the filesystem.
       git-diff-index --cached <tree-ish>
              compares the <tree-ish> and the index.
So it does compare treeish ad index, but only if explicitly told so.
Pity, there seem to be no way to update the times in --index-info
protocol yet. Maybe it'll be even never needed, after cache functions
gone libraries.

> All the more a reason to go forward with direct calls to read_cache() and 
> write_cache(). At the moment, my plan is
> 
> - trivially split the read_cache() code into read_cache()/read_cache_from(),
> - introduce flush_cache(),
> - trivially rewrite add_file_to_cache(), and add_cache_info() from
>   builtin-update-index.c, move that to read-tree.c, too, and
> - throw out the pipe to git-update-index from merge-recursive.c 
>   altogether.
> 
> This should be less intrusive than it sounds: with the introduction of 
> read_cache_from() it should be trivial to handle the problem of different 
> index files.
> 
> We have to put a lock_file on the index file at the start, and write the 
> index file at the end.
> 

Yes, very nice plan, all by itself :)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 22:32   ` Alex Riesen
  2006-06-28  9:34     ` Alex Riesen
@ 2006-06-29 13:16     ` Johannes Schindelin
  2006-06-29 13:16     ` [PATCH 1/2] refactor merge_bases() as preparation to libify merge-base Johannes Schindelin
  2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
  3 siblings, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 13:16 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

On Wed, 28 Jun 2006, Alex Riesen wrote:

> > > Path list optimization should be next (and I'll be glad if someone does 
> > > this before me).
> > 
> > See below.
> > 
> 
> Aah, thanks. Merged, tried, tested, left in patch.
> The reallocs can cause some undesirable heap fragmentation, don't you
> think?

I do not care too deeply about the heap fragmentation there: it is just 4 
bytes per file name. Besides, I chose an increment of 32 (probably I 
should use alloc_nr() instead).

> I tried to replace that code completely with a call to git-merge-base
> (it does not happen too often). So far it passed all tests.
> 
> > I have some commits in a private branch to split out get_merge_bases() 
> > from merge-base.c, so I'll try that next.
> 
> Thanks, that'd be nice.

Will reply with two patches.

> > BTW, before actually finishing this, we might want to do away with 
> > capitalizedFunctionNames and 4-space indent.
> 
> 4-space indent should be gone by now, the names pending (they were
> important in initial debugging after conversion).

Okay.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 1/2] refactor merge_bases() as preparation to libify merge-base
  2006-06-27 22:32   ` Alex Riesen
  2006-06-28  9:34     ` Alex Riesen
  2006-06-29 13:16     ` Johannes Schindelin
@ 2006-06-29 13:16     ` Johannes Schindelin
  2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
  3 siblings, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 13:16 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds


---
 merge-base.c |   64 ++++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/merge-base.c b/merge-base.c
index 4856ca0..7d87c20 100644
--- a/merge-base.c
+++ b/merge-base.c
@@ -124,8 +124,6 @@ static struct commit *interesting(struct
  * to contaminate D and E.
  */
 
-static int show_all = 0;
-
 static void mark_reachable_commits(struct commit_list *result,
 				   struct commit_list *list)
 {
@@ -167,34 +165,33 @@ static void mark_reachable_commits(struc
 	}
 }
 
-static int merge_base(struct commit *rev1, struct commit *rev2)
+struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2)
 {
 	struct commit_list *list = NULL;
 	struct commit_list *result = NULL;
 	struct commit_list *tmp = NULL;
 
-	if (rev1 == rev2) {
-		printf("%s\n", sha1_to_hex(rev1->object.sha1));
-		return 0;
-	}
+	if (rev1 == rev2)
+		return commit_list_insert(rev1, &result);
 
 	parse_commit(rev1);
 	parse_commit(rev2);
 
-	rev1->object.flags |= 1;
-	rev2->object.flags |= 2;
+	rev1->object.flags |= PARENT1;
+	rev2->object.flags |= PARENT2;
 	insert_by_date(rev1, &list);
 	insert_by_date(rev2, &list);
 
 	while (interesting(list)) {
 		struct commit *commit = list->item;
 		struct commit_list *parents;
-		int flags = commit->object.flags & 7;
+		int flags = commit->object.flags
+			& (PARENT1 | PARENT2 | UNINTERESTING);
 
 		tmp = list;
 		list = list->next;
 		free(tmp);
-		if (flags == 3) {
+		if (flags == (PARENT1 | PARENT2)) {
 			insert_by_date(commit, &result);
 
 			/* Mark parents of a found merge uninteresting */
@@ -213,21 +210,52 @@ static int merge_base(struct commit *rev
 	}
 
 	if (!result)
-		return 1;
+		return NULL;
 
 	if (result->next && list)
 		mark_reachable_commits(result, list);
 
+	/* cull duplicates */
+	for (tmp = result, list = NULL; tmp; ) {
+		struct commit *commit = tmp->item;
+		struct commit_list *next = tmp->next;
+		if (commit->object.flags & UNINTERESTING) {
+			if (list != NULL)
+				list->next = next;
+			free(tmp);
+		} else {
+			if (list == NULL)
+				result = tmp;
+			list = tmp;
+			commit->object.flags |= UNINTERESTING;
+		}
+
+		tmp = next;
+	}
+
+	/* reset flags */
+	clear_commit_marks(rev1, PARENT1 | PARENT2 | UNINTERESTING);
+	clear_commit_marks(rev2, PARENT1 | PARENT2 | UNINTERESTING);
+
+	return result;
+}
+
+static int show_all = 0;
+
+static int merge_base(struct commit *rev1, struct commit *rev2)
+{
+	struct commit_list *result = get_merge_bases(rev1, rev2);
+
+	if (!result)
+		return 1;
+
 	while (result) {
-		struct commit *commit = result->item;
-		result = result->next;
-		if (commit->object.flags & UNINTERESTING)
-			continue;
-		printf("%s\n", sha1_to_hex(commit->object.sha1));
+		printf("%s\n", sha1_to_hex(result->item->object.sha1));
 		if (!show_all)
 			return 0;
-		commit->object.flags |= UNINTERESTING;
+		result = result->next;
 	}
+
 	return 0;
 }
 
-- 
1.4.1.rc1.g87c00-dirty

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-27 22:32   ` Alex Riesen
                       ` (2 preceding siblings ...)
  2006-06-29 13:16     ` [PATCH 1/2] refactor merge_bases() as preparation to libify merge-base Johannes Schindelin
@ 2006-06-29 13:17     ` Johannes Schindelin
  2006-06-29 13:21       ` Johannes Schindelin
                         ` (2 more replies)
  3 siblings, 3 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 13:17 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds


most of this patch is just a "sub-file rename", i.e. moving code
literally (sue me, SCO!) from merge-base.c to commit.c.

---
 commit.c          |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 commit.h          |    2 
 merge-base.c      |  238 -----------------------------------------------------
 merge-recursive.c |   23 ++---
 4 files changed, 250 insertions(+), 253 deletions(-)

diff --git a/commit.c b/commit.c
index 946615d..891c6d0 100644
--- a/commit.c
+++ b/commit.c
@@ -842,3 +842,243 @@ void sort_in_topological_order_fn(struct
 	}
 	free(nodes);
 }
+
+/* merge-rebase stuff */
+
+#define PARENT1 1
+#define PARENT2 2
+#define UNINTERESTING 4
+
+static struct commit *interesting(struct commit_list *list)
+{
+	while (list) {
+		struct commit *commit = list->item;
+		list = list->next;
+		if (commit->object.flags & UNINTERESTING)
+			continue;
+		return commit;
+	}
+	return NULL;
+}
+
+/*
+ * A pathological example of how this thing works.
+ *
+ * Suppose we had this commit graph, where chronologically
+ * the timestamp on the commit are A <= B <= C <= D <= E <= F
+ * and we are trying to figure out the merge base for E and F
+ * commits.
+ *
+ *                  F
+ *                 / \
+ *            E   A   D
+ *             \ /   /  
+ *              B   /
+ *               \ /
+ *                C
+ *
+ * First we push E and F to list to be processed.  E gets bit 1
+ * and F gets bit 2.  The list becomes:
+ *
+ *     list=F(2) E(1), result=empty
+ *
+ * Then we pop F, the newest commit, from the list.  Its flag is 2.
+ * We scan its parents, mark them reachable from the side that F is
+ * reachable from, and push them to the list:
+ *
+ *     list=E(1) D(2) A(2), result=empty
+ *
+ * Next pop E and do the same.
+ *
+ *     list=D(2) B(1) A(2), result=empty
+ *
+ * Next pop D and do the same.
+ *
+ *     list=C(2) B(1) A(2), result=empty
+ *
+ * Next pop C and do the same.
+ *
+ *     list=B(1) A(2), result=empty
+ *
+ * Now it is B's turn.  We mark its parent, C, reachable from B's side,
+ * and push it to the list:
+ *
+ *     list=C(3) A(2), result=empty
+ *
+ * Now pop C and notice it has flags==3.  It is placed on the result list,
+ * and the list now contains:
+ *
+ *     list=A(2), result=C(3)
+ *
+ * We pop A and do the same.
+ * 
+ *     list=B(3), result=C(3)
+ *
+ * Next, we pop B and something very interesting happens.  It has flags==3
+ * so it is also placed on the result list, and its parents are marked
+ * uninteresting, retroactively, and placed back on the list:
+ *
+ *    list=C(7), result=C(7) B(3)
+ * 
+ * Now, list does not have any interesting commit.  So we find the newest
+ * commit from the result list that is not marked uninteresting.  Which is
+ * commit B.
+ *
+ *
+ * Another pathological example how this thing used to fail to mark an
+ * ancestor of a merge base as UNINTERESTING before we introduced the
+ * postprocessing phase (mark_reachable_commits).
+ *
+ *		  2
+ *		  H
+ *	    1    / \
+ *	    G   A   \
+ *	    |\ /     \ 
+ *	    | B       \
+ *	    |  \       \
+ *	     \  C       F
+ *	      \  \     / 
+ *	       \  D   /   
+ *		\ |  /
+ *		 \| /
+ *		  E
+ *
+ *	 list			A B C D E F G H
+ *	 G1 H2			- - - - - - 1 2
+ *	 H2 E1 B1		- 1 - - 1 - 1 2
+ *	 F2 E1 B1 A2		2 1 - - 1 2 1 2
+ *	 E3 B1 A2		2 1 - - 3 2 1 2
+ *	 B1 A2			2 1 - - 3 2 1 2
+ *	 C1 A2			2 1 1 - 3 2 1 2
+ *	 D1 A2			2 1 1 1 3 2 1 2
+ *	 A2			2 1 1 1 3 2 1 2
+ *	 B3			2 3 1 1 3 2 1 2
+ *	 C7			2 3 7 1 3 2 1 2
+ *
+ * At this point, unfortunately, everybody in the list is
+ * uninteresting, so we fail to complete the following two
+ * steps to fully marking uninteresting commits.
+ *
+ *	 D7			2 3 7 7 3 2 1 2
+ *	 E7			2 3 7 7 7 2 1 2
+ *
+ * and we ended up showing E as an interesting merge base.
+ * The postprocessing phase re-injects C and continues traversal
+ * to contaminate D and E.
+ */
+
+static void mark_reachable_commits(struct commit_list *result,
+				   struct commit_list *list)
+{
+	struct commit_list *tmp;
+
+	/*
+	 * Postprocess to fully contaminate the well.
+	 */
+	for (tmp = result; tmp; tmp = tmp->next) {
+		struct commit *c = tmp->item;
+		/* Reinject uninteresting ones to list,
+		 * so we can scan their parents.
+		 */
+		if (c->object.flags & UNINTERESTING)
+			commit_list_insert(c, &list);
+	}
+	while (list) {
+		struct commit *c = list->item;
+		struct commit_list *parents;
+
+		tmp = list;
+		list = list->next;
+		free(tmp);
+
+		/* Anything taken out of the list is uninteresting, so
+		 * mark all its parents uninteresting.  We do not
+		 * parse new ones (we already parsed all the relevant
+		 * ones).
+		 */
+		parents = c->parents;
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+			if (!(p->object.flags & UNINTERESTING)) {
+				p->object.flags |= UNINTERESTING;
+				commit_list_insert(p, &list);
+			}
+		}
+	}
+}
+
+struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2)
+{
+	struct commit_list *list = NULL;
+	struct commit_list *result = NULL;
+	struct commit_list *tmp = NULL;
+
+	if (rev1 == rev2)
+		return commit_list_insert(rev1, &result);
+
+	parse_commit(rev1);
+	parse_commit(rev2);
+
+	rev1->object.flags |= PARENT1;
+	rev2->object.flags |= PARENT2;
+	insert_by_date(rev1, &list);
+	insert_by_date(rev2, &list);
+
+	while (interesting(list)) {
+		struct commit *commit = list->item;
+		struct commit_list *parents;
+		int flags = commit->object.flags
+			& (PARENT1 | PARENT2 | UNINTERESTING);
+
+		tmp = list;
+		list = list->next;
+		free(tmp);
+		if (flags == (PARENT1 | PARENT2)) {
+			insert_by_date(commit, &result);
+
+			/* Mark parents of a found merge uninteresting */
+			flags |= UNINTERESTING;
+		}
+		parents = commit->parents;
+		while (parents) {
+			struct commit *p = parents->item;
+			parents = parents->next;
+			if ((p->object.flags & flags) == flags)
+				continue;
+			parse_commit(p);
+			p->object.flags |= flags;
+			insert_by_date(p, &list);
+		}
+	}
+
+	if (!result)
+		return NULL;
+
+	if (result->next && list)
+		mark_reachable_commits(result, list);
+
+	/* cull duplicates */
+	for (tmp = result, list = NULL; tmp; ) {
+		struct commit *commit = tmp->item;
+		struct commit_list *next = tmp->next;
+		if (commit->object.flags & UNINTERESTING) {
+			if (list != NULL)
+				list->next = next;
+			free(tmp);
+		} else {
+			if (list == NULL)
+				result = tmp;
+			list = tmp;
+			commit->object.flags |= UNINTERESTING;
+		}
+
+		tmp = next;
+	}
+
+	/* reset flags */
+	clear_commit_marks(rev1, PARENT1 | PARENT2 | UNINTERESTING);
+	clear_commit_marks(rev2, PARENT1 | PARENT2 | UNINTERESTING);
+
+	return result;
+}
diff --git a/commit.h b/commit.h
index 7c9ca3f..89b9dad 100644
--- a/commit.h
+++ b/commit.h
@@ -105,4 +105,6 @@ struct commit_graft *read_graft_line(cha
 int register_commit_graft(struct commit_graft *, int);
 int read_graft_file(const char *graft_file);
 
+extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2);
+
 #endif /* COMMIT_H */
diff --git a/merge-base.c b/merge-base.c
index 7d87c20..b41f76c 100644
--- a/merge-base.c
+++ b/merge-base.c
@@ -2,244 +2,6 @@ #include <stdlib.h>
 #include "cache.h"
 #include "commit.h"
 
-#define PARENT1 1
-#define PARENT2 2
-#define UNINTERESTING 4
-
-static struct commit *interesting(struct commit_list *list)
-{
-	while (list) {
-		struct commit *commit = list->item;
-		list = list->next;
-		if (commit->object.flags & UNINTERESTING)
-			continue;
-		return commit;
-	}
-	return NULL;
-}
-
-/*
- * A pathological example of how this thing works.
- *
- * Suppose we had this commit graph, where chronologically
- * the timestamp on the commit are A <= B <= C <= D <= E <= F
- * and we are trying to figure out the merge base for E and F
- * commits.
- *
- *                  F
- *                 / \
- *            E   A   D
- *             \ /   /  
- *              B   /
- *               \ /
- *                C
- *
- * First we push E and F to list to be processed.  E gets bit 1
- * and F gets bit 2.  The list becomes:
- *
- *     list=F(2) E(1), result=empty
- *
- * Then we pop F, the newest commit, from the list.  Its flag is 2.
- * We scan its parents, mark them reachable from the side that F is
- * reachable from, and push them to the list:
- *
- *     list=E(1) D(2) A(2), result=empty
- *
- * Next pop E and do the same.
- *
- *     list=D(2) B(1) A(2), result=empty
- *
- * Next pop D and do the same.
- *
- *     list=C(2) B(1) A(2), result=empty
- *
- * Next pop C and do the same.
- *
- *     list=B(1) A(2), result=empty
- *
- * Now it is B's turn.  We mark its parent, C, reachable from B's side,
- * and push it to the list:
- *
- *     list=C(3) A(2), result=empty
- *
- * Now pop C and notice it has flags==3.  It is placed on the result list,
- * and the list now contains:
- *
- *     list=A(2), result=C(3)
- *
- * We pop A and do the same.
- * 
- *     list=B(3), result=C(3)
- *
- * Next, we pop B and something very interesting happens.  It has flags==3
- * so it is also placed on the result list, and its parents are marked
- * uninteresting, retroactively, and placed back on the list:
- *
- *    list=C(7), result=C(7) B(3)
- * 
- * Now, list does not have any interesting commit.  So we find the newest
- * commit from the result list that is not marked uninteresting.  Which is
- * commit B.
- *
- *
- * Another pathological example how this thing used to fail to mark an
- * ancestor of a merge base as UNINTERESTING before we introduced the
- * postprocessing phase (mark_reachable_commits).
- *
- *		  2
- *		  H
- *	    1    / \
- *	    G   A   \
- *	    |\ /     \ 
- *	    | B       \
- *	    |  \       \
- *	     \  C       F
- *	      \  \     / 
- *	       \  D   /   
- *		\ |  /
- *		 \| /
- *		  E
- *
- *	 list			A B C D E F G H
- *	 G1 H2			- - - - - - 1 2
- *	 H2 E1 B1		- 1 - - 1 - 1 2
- *	 F2 E1 B1 A2		2 1 - - 1 2 1 2
- *	 E3 B1 A2		2 1 - - 3 2 1 2
- *	 B1 A2			2 1 - - 3 2 1 2
- *	 C1 A2			2 1 1 - 3 2 1 2
- *	 D1 A2			2 1 1 1 3 2 1 2
- *	 A2			2 1 1 1 3 2 1 2
- *	 B3			2 3 1 1 3 2 1 2
- *	 C7			2 3 7 1 3 2 1 2
- *
- * At this point, unfortunately, everybody in the list is
- * uninteresting, so we fail to complete the following two
- * steps to fully marking uninteresting commits.
- *
- *	 D7			2 3 7 7 3 2 1 2
- *	 E7			2 3 7 7 7 2 1 2
- *
- * and we ended up showing E as an interesting merge base.
- * The postprocessing phase re-injects C and continues traversal
- * to contaminate D and E.
- */
-
-static void mark_reachable_commits(struct commit_list *result,
-				   struct commit_list *list)
-{
-	struct commit_list *tmp;
-
-	/*
-	 * Postprocess to fully contaminate the well.
-	 */
-	for (tmp = result; tmp; tmp = tmp->next) {
-		struct commit *c = tmp->item;
-		/* Reinject uninteresting ones to list,
-		 * so we can scan their parents.
-		 */
-		if (c->object.flags & UNINTERESTING)
-			commit_list_insert(c, &list);
-	}
-	while (list) {
-		struct commit *c = list->item;
-		struct commit_list *parents;
-
-		tmp = list;
-		list = list->next;
-		free(tmp);
-
-		/* Anything taken out of the list is uninteresting, so
-		 * mark all its parents uninteresting.  We do not
-		 * parse new ones (we already parsed all the relevant
-		 * ones).
-		 */
-		parents = c->parents;
-		while (parents) {
-			struct commit *p = parents->item;
-			parents = parents->next;
-			if (!(p->object.flags & UNINTERESTING)) {
-				p->object.flags |= UNINTERESTING;
-				commit_list_insert(p, &list);
-			}
-		}
-	}
-}
-
-struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2)
-{
-	struct commit_list *list = NULL;
-	struct commit_list *result = NULL;
-	struct commit_list *tmp = NULL;
-
-	if (rev1 == rev2)
-		return commit_list_insert(rev1, &result);
-
-	parse_commit(rev1);
-	parse_commit(rev2);
-
-	rev1->object.flags |= PARENT1;
-	rev2->object.flags |= PARENT2;
-	insert_by_date(rev1, &list);
-	insert_by_date(rev2, &list);
-
-	while (interesting(list)) {
-		struct commit *commit = list->item;
-		struct commit_list *parents;
-		int flags = commit->object.flags
-			& (PARENT1 | PARENT2 | UNINTERESTING);
-
-		tmp = list;
-		list = list->next;
-		free(tmp);
-		if (flags == (PARENT1 | PARENT2)) {
-			insert_by_date(commit, &result);
-
-			/* Mark parents of a found merge uninteresting */
-			flags |= UNINTERESTING;
-		}
-		parents = commit->parents;
-		while (parents) {
-			struct commit *p = parents->item;
-			parents = parents->next;
-			if ((p->object.flags & flags) == flags)
-				continue;
-			parse_commit(p);
-			p->object.flags |= flags;
-			insert_by_date(p, &list);
-		}
-	}
-
-	if (!result)
-		return NULL;
-
-	if (result->next && list)
-		mark_reachable_commits(result, list);
-
-	/* cull duplicates */
-	for (tmp = result, list = NULL; tmp; ) {
-		struct commit *commit = tmp->item;
-		struct commit_list *next = tmp->next;
-		if (commit->object.flags & UNINTERESTING) {
-			if (list != NULL)
-				list->next = next;
-			free(tmp);
-		} else {
-			if (list == NULL)
-				result = tmp;
-			list = tmp;
-			commit->object.flags |= UNINTERESTING;
-		}
-
-		tmp = next;
-	}
-
-	/* reset flags */
-	clear_commit_marks(rev1, PARENT1 | PARENT2 | UNINTERESTING);
-	clear_commit_marks(rev2, PARENT1 | PARENT2 | UNINTERESTING);
-
-	return result;
-}
-
 static int show_all = 0;
 
 static int merge_base(struct commit *rev1, struct commit *rev2)
diff --git a/merge-recursive.c b/merge-recursive.c
index 9bbb426..c0de38a 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -186,22 +186,15 @@ struct merge_result merge(struct node *h
 		node_list_insert(ancestor, &ca);
 	else {
 		struct node_list **pca = &ca;
-		char cmd[100];
-		sprintf(cmd, "git-merge-base --all %s %s",
-			node_hex_sha1(h1),
-			node_hex_sha1(h2));
-		FILE *fp = popen(cmd, "r");
-		while (!feof(fp)) {
-			unsigned char sha1[20];
-			int ch;
-			if (fget_sha1(sha1, fp, &ch) == 0) {
-				struct node *n;
-				n = node_alloc(lookup_commit(sha1));
-				node_list_insert(n, pca);
-				pca = &(*pca)->next;
-			}
+		struct commit_list *iter, *merge_bases
+			= get_merge_bases(h1->commit, h2->commit);
+
+		for (iter = merge_bases; iter; iter = iter->next) {
+			struct node *n = node_alloc(iter->item);
+			node_list_insert(n, pca);
+			pca = &(*pca)->next;
 		}
-		pclose(fp);
+		free_commit_list(merge_bases);
 	}
 
 	output("found %u common ancestor(s):", node_list_count(ca));
-- 
1.4.1.rc1.g87c00-dirty

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
@ 2006-06-29 13:21       ` Johannes Schindelin
  2006-06-29 14:12         ` Alex Riesen
  2006-06-29 14:13       ` Alex Riesen
  2006-06-29 14:14       ` Alex Riesen
  2 siblings, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 13:21 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

Aargh! Of course this is [PATCH 2/2]. BTW, no signoff, since the whole 
merge-recursive is not mergeable yet (passes the tests, but we have a 
small way to go).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 13:21       ` Johannes Schindelin
@ 2006-06-29 14:12         ` Alex Riesen
  2006-06-29 16:14           ` Johannes Schindelin
  0 siblings, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-29 14:12 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

On 6/29/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> Aargh! Of course this is [PATCH 2/2]. BTW, no signoff, since the whole
> merge-recursive is not mergeable yet (passes the tests, but we have a
> small way to go).

How did you get it to pass the tests? Maybe you still have git-merge-recursive
as default merge strategy?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 14:12         ` Alex Riesen
@ 2006-06-29 16:14           ` Johannes Schindelin
  0 siblings, 0 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 16:14 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

On Thu, 29 Jun 2006, Alex Riesen wrote:

> On 6/29/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > Aargh! Of course this is [PATCH 2/2]. BTW, no signoff, since the whole
> > merge-recursive is not mergeable yet (passes the tests, but we have a
> > small way to go).
> 
> How did you get it to pass the tests? Maybe you still have git-merge-recursive
> as default merge strategy?

Darn. Yes.

Sorry for the noise.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
  2006-06-29 13:21       ` Johannes Schindelin
@ 2006-06-29 14:13       ` Alex Riesen
  2006-06-29 14:14       ` Alex Riesen
  2 siblings, 0 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-29 14:13 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

On 6/29/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>
> most of this patch is just a "sub-file rename", i.e. moving code
> literally (sue me, SCO!) from merge-base.c to commit.c.
>

Aah, thanks! Will try it later today.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
  2006-06-29 13:21       ` Johannes Schindelin
  2006-06-29 14:13       ` Alex Riesen
@ 2006-06-29 14:14       ` Alex Riesen
  2006-06-29 16:14         ` Johannes Schindelin
  2 siblings, 1 reply; 32+ messages in thread
From: Alex Riesen @ 2006-06-29 14:14 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

On 6/29/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
>
> most of this patch is just a "sub-file rename", i.e. moving code
> literally (sue me, SCO!) from merge-base.c to commit.c.
>

BTW, you probably want to post merge-recursive.c changes separately.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 14:14       ` Alex Riesen
@ 2006-06-29 16:14         ` Johannes Schindelin
  2006-06-29 18:26           ` Linus Torvalds
  2006-06-29 18:39           ` Junio C Hamano
  0 siblings, 2 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 16:14 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Linus Torvalds

Hi,

On Thu, 29 Jun 2006, Alex Riesen wrote:

> On 6/29/06, Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > 
> > most of this patch is just a "sub-file rename", i.e. moving code
> > literally (sue me, SCO!) from merge-base.c to commit.c.
> > 
> 
> BTW, you probably want to post merge-recursive.c changes separately.

My point being: it makes no sense to split off get_merge_bases() if nobody 
uses it except for git-merge-base.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 16:14         ` Johannes Schindelin
@ 2006-06-29 18:26           ` Linus Torvalds
  2006-06-29 18:39           ` Junio C Hamano
  1 sibling, 0 replies; 32+ messages in thread
From: Linus Torvalds @ 2006-06-29 18:26 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Alex Riesen, git, Junio C Hamano, Fredrik Kuivinen

On Thu, 29 Jun 2006, Johannes Schindelin wrote:
> 
> My point being: it makes no sense to split off get_merge_bases() if nobody 
> uses it except for git-merge-base.

I disagree. Your get_merge_bases() function is another small step in 
cleanup and libification, so I think it's perfectly valid regardless of 
whether somebody uses it yet.

A lot of the libification effort ends up being preparatory, because often 
you need to get past a hump of libifying a _lot_ in order to actually use 
something in a library.

So at first, the only user is usually the old separate program. So you 
libify get_merge_bases, and initially the only user is git-merge-base, 
which could now be made a built-in, since the bulk of its code is going to 
be linked in regardless..

		Linus

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 16:14         ` Johannes Schindelin
  2006-06-29 18:26           ` Linus Torvalds
@ 2006-06-29 18:39           ` Junio C Hamano
  2006-06-29 19:06             ` Johannes Schindelin
  1 sibling, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2006-06-29 18:39 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> My point being: it makes no sense to split off get_merge_bases() if nobody 
> uses it except for git-merge-base.

I do not think that is a good reasoning.  If something is
reusable (or you made it reusable) and you are planning to reuse
it later, splitting it out without changing anything else to
make sure the split is correct is a seemingly small but a very
important step.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 18:39           ` Junio C Hamano
@ 2006-06-29 19:06             ` Johannes Schindelin
  2006-06-29 20:58               ` Junio C Hamano
  2006-06-30  0:27               ` Alex Riesen
  0 siblings, 2 replies; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-29 19:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Hi Linus, Hi Junio

[this is a response to both of your responses; mail threads cannot yet be 
merged a la git ;-)]

On Thu, 29 Jun 2006, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> > My point being: it makes no sense to split off get_merge_bases() if nobody 
> > uses it except for git-merge-base.
> 
> I do not think that is a good reasoning.  If something is
> reusable (or you made it reusable) and you are planning to reuse
> it later, splitting it out without changing anything else to
> make sure the split is correct is a seemingly small but a very
> important step.

Okay. Convinced.

I tested my patch again, and like Alex said, a test fails. But I tested on 
top of Alex's latest merge-recursive patch, which has that nasty 
update-index bug, and that is the reason for the test to fail.

So, a few tests later, I am pretty sure that my patches do not break 
git-merge-base. I'll prepare another patch series which builds-in 
merge-base.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 19:06             ` Johannes Schindelin
@ 2006-06-29 20:58               ` Junio C Hamano
  2006-06-30  0:27               ` Alex Riesen
  1 sibling, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2006-06-29 20:58 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> So, a few tests later, I am pretty sure that my patches do not break 
> git-merge-base. I'll prepare another patch series which builds-in 
> merge-base.

I'll take your two patches to refactor merge-base and to move
the bulk of the code to commit.c for now.  Updated "next" coming
shortly.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH] move get_merge_bases() to core lib; use it in merge-recursive
  2006-06-29 19:06             ` Johannes Schindelin
  2006-06-29 20:58               ` Junio C Hamano
@ 2006-06-30  0:27               ` Alex Riesen
  1 sibling, 0 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-30  0:27 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git

Johannes Schindelin, Thu, Jun 29, 2006 21:06:56 +0200:
> 
> Okay. Convinced.
> 
> I tested my patch again, and like Alex said, a test fails. But I tested on 
> top of Alex's latest merge-recursive patch, which has that nasty 
> update-index bug, and that is the reason for the test to fail.
> 
> So, a few tests later, I am pretty sure that my patches do not break 
> git-merge-base. I'll prepare another patch series which builds-in 
> merge-base.
> 

Just for reference, I'll repost all my patches again, somewhat
consolidated. Your merge_base patches can be applied on top of that
series.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 15:48 CFT: merge-recursive in C (updated) Alex Riesen
  2006-06-27 16:42 ` Johannes Schindelin
@ 2006-06-27 18:22 ` Linus Torvalds
  2006-06-27 18:45   ` Johannes Schindelin
  1 sibling, 1 reply; 32+ messages in thread
From: Linus Torvalds @ 2006-06-27 18:22 UTC (permalink / raw)
  To: Alex Riesen; +Cc: git, Junio C Hamano, Fredrik Kuivinen, Johannes Schindelin

On Tue, 27 Jun 2006, Alex Riesen wrote:
> 
> Good news is that it is faster: 6min vs 10min. Bad news is that it is still
> not enough for me and it is only on Linux (Windows will be slower,
> still testing),
> uses an awful lot of memory and CPU.

Why do you do that horrible node_list, and the broken "find common 
ancestors?"

I can't follow your code, but it _looks_ like you are using some totally 
broken graph walking function.

For git, the #1 optimization ALWAYS is to avoid walking the full commit 
graph. That fundamentally _cannot_ scale. 

Almost all of the "hard work" in git has been to try to read the minimum 
amount of commits possible. That means that you absolutely must not just 
walk the commits the "obvious" way, because that will always require you 
to build up the whole graph of the whole history, even if the common 
shared point is much closer.

So it look slike your "graph.c" is _fundamentally_ flawed.

You need to really read "git-merge-base.c" and understand it thoroughly. 
And then you need to throw away your graph.c, and use git-merge-base 
instead.

			Linus

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 18:22 ` CFT: merge-recursive in C (updated) Linus Torvalds
@ 2006-06-27 18:45   ` Johannes Schindelin
  2006-06-27 19:10     ` Linus Torvalds
  0 siblings, 1 reply; 32+ messages in thread
From: Johannes Schindelin @ 2006-06-27 18:45 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Alex Riesen, git, Junio C Hamano, Fredrik Kuivinen

Hi,

On Tue, 27 Jun 2006, Linus Torvalds wrote:

> 
> 
> On Tue, 27 Jun 2006, Alex Riesen wrote:
> > 
> > Good news is that it is faster: 6min vs 10min. Bad news is that it is still
> > not enough for me and it is only on Linux (Windows will be slower,
> > still testing),
> > uses an awful lot of memory and CPU.
> 
> Why do you do that horrible node_list, and the broken "find common 
> ancestors?"
> 
> I can't follow your code, but it _looks_ like you are using some totally 
> broken graph walking function.

Note that all Alex does was to translate the Python code to C. And in 
Python, this horrible graph thing was necessary, because git is not 
libified yet.

HOWEVER, I think it is a very good start. It _works_, albeit slow, and we 
have test cases in place to make sure that our wonderful optimizations do 
not break the tool.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 18:45   ` Johannes Schindelin
@ 2006-06-27 19:10     ` Linus Torvalds
  2006-06-27 22:36       ` Alex Riesen
  0 siblings, 1 reply; 32+ messages in thread
From: Linus Torvalds @ 2006-06-27 19:10 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Alex Riesen, git, Junio C Hamano, Fredrik Kuivinen

On Tue, 27 Jun 2006, Johannes Schindelin wrote:
> 
> Note that all Alex does was to translate the Python code to C. And in 
> Python, this horrible graph thing was necessary, because git is not 
> libified yet.

Ahh. I thought the Python thing used an external program to do all graph 
calculations.

One thing to look out for is (as Junio mentioned) that you do often want 
to merge things with externally controlled "history": either by having 
something like cherry-pick/rebase give fake history information, or by 
manually forcing a certain merge-base (possibly even an empty tree) in 
order to generate a merge of two unrelated histories.

> HOWEVER, I think it is a very good start. It _works_, albeit slow, and we 
> have test cases in place to make sure that our wonderful optimizations do 
> not break the tool.

Yeah, once it's all in C, it's going to be easier to move functionality 
around incrementally.

		Linus

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: CFT: merge-recursive in C (updated)
  2006-06-27 19:10     ` Linus Torvalds
@ 2006-06-27 22:36       ` Alex Riesen
  0 siblings, 0 replies; 32+ messages in thread
From: Alex Riesen @ 2006-06-27 22:36 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Johannes Schindelin, git, Junio C Hamano, Fredrik Kuivinen

Linus Torvalds, Tue, Jun 27, 2006 21:10:07 +0200:
> > HOWEVER, I think it is a very good start. It _works_, albeit slow, and we 
> > have test cases in place to make sure that our wonderful optimizations do 
> > not break the tool.
> 
> Yeah, once it's all in C, it's going to be easier to move functionality 
> around incrementally.
> 

That's why I started it: it'll eventually become so easy to move
functionality around, that it will move all by itself and optimizes
in a perfect code.

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2006-06-30  0:27 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-27 15:48 CFT: merge-recursive in C (updated) Alex Riesen
2006-06-27 16:42 ` Johannes Schindelin
2006-06-27 22:32   ` Alex Riesen
2006-06-28  9:34     ` Alex Riesen
2006-06-28 10:04       ` Johannes Schindelin
2006-06-28 11:35         ` Alex Riesen
2006-06-28 12:55           ` Johannes Schindelin
2006-06-28 14:27             ` Alex Riesen
2006-06-28 15:09               ` Johannes Schindelin
2006-06-29  0:25       ` Alex Riesen
2006-06-29  2:41         ` Junio C Hamano
2006-06-29  8:43           ` Alex Riesen
2006-06-29 18:40             ` Johannes Schindelin
2006-06-29 23:28               ` Alex Riesen
2006-06-29 13:16     ` Johannes Schindelin
2006-06-29 13:16     ` [PATCH 1/2] refactor merge_bases() as preparation to libify merge-base Johannes Schindelin
2006-06-29 13:17     ` [PATCH] move get_merge_bases() to core lib; use it in merge-recursive Johannes Schindelin
2006-06-29 13:21       ` Johannes Schindelin
2006-06-29 14:12         ` Alex Riesen
2006-06-29 16:14           ` Johannes Schindelin
2006-06-29 14:13       ` Alex Riesen
2006-06-29 14:14       ` Alex Riesen
2006-06-29 16:14         ` Johannes Schindelin
2006-06-29 18:26           ` Linus Torvalds
2006-06-29 18:39           ` Junio C Hamano
2006-06-29 19:06             ` Johannes Schindelin
2006-06-29 20:58               ` Junio C Hamano
2006-06-30  0:27               ` Alex Riesen
2006-06-27 18:22 ` CFT: merge-recursive in C (updated) Linus Torvalds
2006-06-27 18:45   ` Johannes Schindelin
2006-06-27 19:10     ` Linus Torvalds
2006-06-27 22:36       ` Alex Riesen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).