[WIP PATCH 0/3] implement merge strategy for submodule links

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [WIP PATCH 0/3] implement merge strategy for submodule links
@ 2010-06-11 12:23 Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 1/3] extend ref iteration for submodules Heiko Voigt
                   ` (3 more replies)
  0 siblings, 4 replies; 32+ messages in thread
From: Heiko Voigt @ 2010-06-11 12:23 UTC (permalink / raw)
  To: git

The following patch series is a work in progress. The idea is whenever
you need to merge two SHA1's of a submodule we search for a ref in the
submodule which already contains both. If one such ref exists the
resulting SHA1 is the one pointed at by that ref.

The implementation currently searches through all refs and if one (and
only one) ref exists which contains both sides it merges. In all other
cases it fails.

Future Plans:

  * Only search stable branches. E.g. by default only master and
    */master. The stable branch list will be configurable.

  * Use read_gitfile_gently for submodule .git handling

  * Use strbuf in git_path_submodule

Further comments or ideas?

The series can also be found on github:

http://github.com/hvoigt/git/tree/hv/submodule_merge

cheers Heiko

Heiko Voigt (3):
  extend ref iteration for submodules
  add missing && to submodule-merge testcase
  implement automatic fast forward merge for submodules

 cache.h                    |    3 +
 merge-recursive.c          |    9 ++-
 path.c                     |   21 ++++++++
 refs.c                     |   89 +++++++++++++++++++++++---------
 refs.h                     |    2 +
 submodule.c                |  121 ++++++++++++++++++++++++++++++++++++++++++++
 submodule.h                |    2 +
 t/t7405-submodule-merge.sh |  117 ++++++++++++++++++++++++++++++++++++++++---
 8 files changed, 328 insertions(+), 36 deletions(-)

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [WIP PATCH 1/3] extend ref iteration for submodules
  2010-06-11 12:23 [WIP PATCH 0/3] implement merge strategy for submodule links Heiko Voigt
@ 2010-06-11 12:23 ` Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 2/3] add missing && to submodule-merge testcase Heiko Voigt
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 32+ messages in thread
From: Heiko Voigt @ 2010-06-11 12:23 UTC (permalink / raw)
  To: git

This is useful to interrogate a submodule from the main repository.

Signed-off-by: Heiko Voigt <hvoigt@hvoigt.net>
---
 cache.h |    3 ++
 path.c  |   21 +++++++++++++++
 refs.c  |   89 ++++++++++++++++++++++++++++++++++++++++++++------------------
 refs.h  |    2 +
 4 files changed, 89 insertions(+), 26 deletions(-)

diff --git a/cache.h b/cache.h
index c966023..daae839 100644
--- a/cache.h
+++ b/cache.h
@@ -621,6 +621,9 @@ extern char *git_pathdup(const char *fmt, ...)
 /* Return a statically allocated filename matching the sha1 signature */
 extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
 extern char *git_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+extern char *git_path_submodule(const char *path, const char *fmt, ...)
+	__attribute__((format (printf, 2, 3)));
+
 extern char *sha1_file_name(const unsigned char *sha1);
 extern char *sha1_pack_name(const unsigned char *sha1);
 extern char *sha1_pack_index_name(const unsigned char *sha1);
diff --git a/path.c b/path.c
index b4c8d91..47118bc 100644
--- a/path.c
+++ b/path.c
@@ -122,6 +122,27 @@ char *git_path(const char *fmt, ...)
 	return cleanup_path(pathname);
 }
 
+char *git_path_submodule(const char *path, const char *fmt, ...)
+{
+	char *pathname = get_pathname();
+	va_list args;
+	unsigned len;
+
+	len = strlen(path);
+	if (len > PATH_MAX-100)
+		return bad_path;
+	memcpy(pathname, path, len);
+	if (len && path[len-1] != '/')
+		pathname[len++] = '/';
+	memcpy(pathname + len, ".git/", 5);
+	len += 5;
+	va_start(args, fmt);
+	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
+	va_end(args);
+	if (len >= PATH_MAX)
+		return bad_path;
+	return cleanup_path(pathname);
+}
 
 /* git_mkstemp() - create tmp file honoring TMPDIR variable */
 int git_mkstemp(char *path, size_t len, const char *template)
diff --git a/refs.c b/refs.c
index d3db15a..eed8a29 100644
--- a/refs.c
+++ b/refs.c
@@ -157,6 +157,7 @@ static struct cached_refs {
 	char did_packed;
 	struct ref_list *loose;
 	struct ref_list *packed;
+	const char *submodule;
 } cached_refs;
 static struct ref_list *current_ref;
 
@@ -229,10 +230,17 @@ void clear_extra_refs(void)
 	extra_refs = NULL;
 }
 
-static struct ref_list *get_packed_refs(void)
+static struct ref_list *get_packed_refs(const char *submodule)
 {
+	const char *packed_refs_file;
+
+	if (submodule)
+		packed_refs_file = git_path_submodule(submodule, "packed-refs");
+	else
+		packed_refs_file = git_path("packed-refs");
+
 	if (!cached_refs.did_packed) {
-		FILE *f = fopen(git_path("packed-refs"), "r");
+		FILE *f = fopen(packed_refs_file, "r");
 		cached_refs.packed = NULL;
 		if (f) {
 			read_packed_refs(f, &cached_refs);
@@ -243,9 +251,19 @@ static struct ref_list *get_packed_refs(void)
 	return cached_refs.packed;
 }
 
-static struct ref_list *get_ref_dir(const char *base, struct ref_list *list)
+static struct ref_list *get_ref_dir(const char *submodule, const char *base,
+				    struct ref_list *list)
 {
-	DIR *dir = opendir(git_path("%s", base));
+	DIR *dir;
+	const char *path;
+
+	if (submodule)
+		path = git_path_submodule(submodule, "%s", base);
+	else
+		path = git_path("%s", base);
+
+
+	dir = opendir(path);
 
 	if (dir) {
 		struct dirent *de;
@@ -261,6 +279,7 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list)
 			struct stat st;
 			int flag;
 			int namelen;
+			const char *refdir;
 
 			if (de->d_name[0] == '.')
 				continue;
@@ -270,16 +289,27 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list)
 			if (has_extension(de->d_name, ".lock"))
 				continue;
 			memcpy(ref + baselen, de->d_name, namelen+1);
-			if (stat(git_path("%s", ref), &st) < 0)
+			refdir = submodule
+				? git_path_submodule(submodule, "%s", ref)
+				: git_path("%s", ref);
+			if (stat(refdir, &st) < 0)
 				continue;
 			if (S_ISDIR(st.st_mode)) {
-				list = get_ref_dir(ref, list);
+				list = get_ref_dir(submodule, ref, list);
 				continue;
 			}
-			if (!resolve_ref(ref, sha1, 1, &flag)) {
+			if (submodule) {
 				hashclr(sha1);
-				flag |= REF_BROKEN;
-			}
+				flag = 0;
+				if (!resolve_gitlink_ref(submodule, ref, sha1)) {
+				//	hashclr(sha1);
+					flag |= REF_BROKEN;
+				}
+			} else
+				if (!resolve_ref(ref, sha1, 1, &flag)) {
+					hashclr(sha1);
+					flag |= REF_BROKEN;
+				}
 			list = add_ref(ref, sha1, flag, list, NULL);
 		}
 		free(ref);
@@ -318,11 +348,12 @@ void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname)
 	for_each_rawref(warn_if_dangling_symref, &data);
 }
 
-static struct ref_list *get_loose_refs(void)
+static struct ref_list *get_loose_refs(const char *submodule)
 {
-	if (!cached_refs.did_loose) {
-		cached_refs.loose = get_ref_dir("refs", NULL);
+	if (!cached_refs.did_loose || cached_refs.submodule != submodule) {
+		cached_refs.loose = get_ref_dir(submodule, "refs", NULL);
 		cached_refs.did_loose = 1;
+		cached_refs.submodule = submodule;
 	}
 	return cached_refs.loose;
 }
@@ -455,7 +486,7 @@ const char *resolve_ref(const char *ref, unsigned char *sha1, int reading, int *
 		git_snpath(path, sizeof(path), "%s", ref);
 		/* Special case: non-existing file. */
 		if (lstat(path, &st) < 0) {
-			struct ref_list *list = get_packed_refs();
+			struct ref_list *list = get_packed_refs(NULL);
 			while (list) {
 				if (!strcmp(ref, list->name)) {
 					hashcpy(sha1, list->sha1);
@@ -584,7 +615,7 @@ int peel_ref(const char *ref, unsigned char *sha1)
 		return -1;
 
 	if ((flag & REF_ISPACKED)) {
-		struct ref_list *list = get_packed_refs();
+		struct ref_list *list = get_packed_refs(NULL);
 
 		while (list) {
 			if (!strcmp(list->name, ref)) {
@@ -611,12 +642,12 @@ fallback:
 	return -1;
 }
 
-static int do_for_each_ref(const char *base, each_ref_fn fn, int trim,
-			   int flags, void *cb_data)
+static int do_for_each_ref(const char *submodule, const char *base, each_ref_fn fn,
+			   int trim, int flags, void *cb_data)
 {
 	int retval = 0;
-	struct ref_list *packed = get_packed_refs();
-	struct ref_list *loose = get_loose_refs();
+	struct ref_list *packed = get_packed_refs(submodule);
+	struct ref_list *loose = get_loose_refs(submodule);
 
 	struct ref_list *extra;
 
@@ -665,12 +696,12 @@ int head_ref(each_ref_fn fn, void *cb_data)
 
 int for_each_ref(each_ref_fn fn, void *cb_data)
 {
-	return do_for_each_ref("refs/", fn, 0, 0, cb_data);
+	return do_for_each_ref(NULL, "refs/", fn, 0, 0, cb_data);
 }
 
 int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data)
 {
-	return do_for_each_ref(prefix, fn, strlen(prefix), 0, cb_data);
+	return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data);
 }
 
 int for_each_tag_ref(each_ref_fn fn, void *cb_data)
@@ -690,7 +721,7 @@ int for_each_remote_ref(each_ref_fn fn, void *cb_data)
 
 int for_each_replace_ref(each_ref_fn fn, void *cb_data)
 {
-	return do_for_each_ref("refs/replace/", fn, 13, 0, cb_data);
+	return do_for_each_ref(NULL, "refs/replace/", fn, 13, 0, cb_data);
 }
 
 int for_each_glob_ref_in(each_ref_fn fn, const char *pattern,
@@ -730,7 +761,13 @@ int for_each_glob_ref(each_ref_fn fn, const char *pattern, void *cb_data)
 
 int for_each_rawref(each_ref_fn fn, void *cb_data)
 {
-	return do_for_each_ref("refs/", fn, 0,
+	return do_for_each_ref(NULL, "refs/", fn, 0,
+			       DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
+}
+
+int for_each_rawref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
+{
+	return do_for_each_ref(submodule, "refs/", fn, 0,
 			       DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
 }
 
@@ -954,7 +991,7 @@ static struct ref_lock *lock_ref_sha1_basic(const char *ref, const unsigned char
 	 * name is a proper prefix of our refname.
 	 */
 	if (missing &&
-	     !is_refname_available(ref, NULL, get_packed_refs(), 0)) {
+	     !is_refname_available(ref, NULL, get_packed_refs(NULL), 0)) {
 		last_errno = ENOTDIR;
 		goto error_return;
 	}
@@ -1017,7 +1054,7 @@ static int repack_without_ref(const char *refname)
 	int fd;
 	int found = 0;
 
-	packed_ref_list = get_packed_refs();
+	packed_ref_list = get_packed_refs(NULL);
 	for (list = packed_ref_list; list; list = list->next) {
 		if (!strcmp(refname, list->name)) {
 			found = 1;
@@ -1106,10 +1143,10 @@ int rename_ref(const char *oldref, const char *newref, const char *logmsg)
 	if (!symref)
 		return error("refname %s not found", oldref);
 
-	if (!is_refname_available(newref, oldref, get_packed_refs(), 0))
+	if (!is_refname_available(newref, oldref, get_packed_refs(NULL), 0))
 		return 1;
 
-	if (!is_refname_available(newref, oldref, get_loose_refs(), 0))
+	if (!is_refname_available(newref, oldref, get_loose_refs(NULL), 0))
 		return 1;
 
 	lock = lock_ref_sha1_basic(renamed_ref, NULL, 0, NULL);
diff --git a/refs.h b/refs.h
index 4a18b08..384e311 100644
--- a/refs.h
+++ b/refs.h
@@ -35,6 +35,8 @@ static inline const char *has_glob_specials(const char *pattern)
 
 /* can be used to learn about broken ref and symref */
 extern int for_each_rawref(each_ref_fn, void *);
+extern int for_each_rawref_submodule(const char *path, each_ref_fn fn, void *cb_data);
+
 
 extern void warn_dangling_symref(FILE *fp, const char *msg_fmt, const char *refname);
 
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [WIP PATCH 2/3] add missing && to submodule-merge testcase
  2010-06-11 12:23 [WIP PATCH 0/3] implement merge strategy for submodule links Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 1/3] extend ref iteration for submodules Heiko Voigt
@ 2010-06-11 12:23 ` Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 3/3] implement automatic fast forward merge for submodules Heiko Voigt
  2010-06-12 10:12 ` [WIP PATCH 0/3] implement merge strategy for submodule links Johan Herland
  3 siblings, 0 replies; 32+ messages in thread
From: Heiko Voigt @ 2010-06-11 12:23 UTC (permalink / raw)
  To: git

Signed-off-by: Heiko Voigt <hvoigt@hvoigt.net>
---
 t/t7405-submodule-merge.sh |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh
index 9a21f78..4a7b893 100755
--- a/t/t7405-submodule-merge.sh
+++ b/t/t7405-submodule-merge.sh
@@ -45,7 +45,7 @@ test_expect_success setup '
 	 git commit -m sub-b) &&
 	git add sub &&
 	test_tick &&
-	git commit -m b
+	git commit -m b &&
 
 	git checkout -b c a &&
 	git merge -s ours b &&
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [WIP PATCH 3/3] implement automatic fast forward merge for submodules
  2010-06-11 12:23 [WIP PATCH 0/3] implement merge strategy for submodule links Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 1/3] extend ref iteration for submodules Heiko Voigt
  2010-06-11 12:23 ` [WIP PATCH 2/3] add missing && to submodule-merge testcase Heiko Voigt
@ 2010-06-11 12:23 ` Heiko Voigt
  2010-06-12 10:12 ` [WIP PATCH 0/3] implement merge strategy for submodule links Johan Herland
  3 siblings, 0 replies; 32+ messages in thread
From: Heiko Voigt @ 2010-06-11 12:23 UTC (permalink / raw)
  To: git

This implements a simple merge strategy for submodule hashes. We look
whether a single ref that contains both hashes exist. In case it does
and the changes on both sides point forward in the direction of that
ref we return the refs revision as the merge result.

It is useful for a workflow in which the developers can publish topic
branches in submodules. Once the topic branch has been merged
into a stable branch the developer can simply merge his branch
in the main repository even when other developers have merged their
submodule changes before them.

Signed-off-by: Heiko Voigt <hvoigt@hvoigt.net>
---
 merge-recursive.c          |    9 ++-
 submodule.c                |  121 ++++++++++++++++++++++++++++++++++++++++++++
 submodule.h                |    2 +
 t/t7405-submodule-merge.sh |  115 +++++++++++++++++++++++++++++++++++++++--
 4 files changed, 238 insertions(+), 9 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 206c103..a032a8b 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -20,6 +20,7 @@
 #include "attr.h"
 #include "merge-recursive.h"
 #include "dir.h"
+#include "submodule.h"
 
 static struct tree *shift_tree_object(struct tree *one, struct tree *two,
 				      const char *subtree_shift)
@@ -525,13 +526,15 @@ static void update_file_flags(struct merge_options *o,
 		void *buf;
 		unsigned long size;
 
-		if (S_ISGITLINK(mode))
+		if (S_ISGITLINK(mode)) {
 			/*
 			 * We may later decide to recursively descend into
 			 * the submodule directory and update its index
 			 * and/or work tree, but we do not do that now.
 			 */
+			update_wd = 0;
 			goto update_index;
+		}
 
 		buf = read_sha1_file(sha, &type, &size);
 		if (!buf)
@@ -716,8 +719,8 @@ static struct merge_file_info merge_file(struct merge_options *o,
 			free(result_buf.ptr);
 			result.clean = (merge_status == 0);
 		} else if (S_ISGITLINK(a->mode)) {
-			result.clean = 0;
-			hashcpy(result.sha, a->sha1);
+			result.clean = merge_submodule(result.sha, one->path, one->sha1,
+						       a->sha1, b->sha1);
 		} else if (S_ISLNK(a->mode)) {
 			hashcpy(result.sha, a->sha1);
 
diff --git a/submodule.c b/submodule.c
index 676d48f..5b0313f 100644
--- a/submodule.c
+++ b/submodule.c
@@ -6,6 +6,7 @@
 #include "revision.h"
 #include "run-command.h"
 #include "diffcore.h"
+#include "refs.h"
 
 static int add_submodule_odb(const char *path)
 {
@@ -205,3 +206,123 @@ unsigned is_submodule_modified(const char *path, int ignore_untracked)
 	strbuf_release(&buf);
 	return dirty_submodule;
 }
+
+struct parent_data {
+	struct rev_info common_parents;
+	struct commit *a;
+	struct commit *b;
+};
+
+static int add_common_parents(const char *refname, const unsigned char *sha1,
+			     int flags, void *cb_data)
+{
+	struct parent_data *args = (struct parent_data *) cb_data;
+	struct commit *commit;
+
+	commit = lookup_commit_reference_gently(sha1, 1);
+	if (!commit)
+		return error("branch '%s' does not point at a commit", refname);
+
+	if (!in_merge_bases(args->a, &commit, 1))
+		return 0;
+	if (!in_merge_bases(args->b, &commit, 1))
+		return 0;
+
+	add_pending_object(&args->common_parents,
+			(struct object *)commit, refname);
+
+	return 0;
+}
+
+static int find_common_parent(unsigned char result[20], const char *path,
+			      struct commit *a, struct commit *b)
+{
+	struct parent_data parent_args;
+	struct commit *commit;
+	unsigned char sha1[20];
+	int i, ret = 0;
+	static const char *format = " %h: %m %s";
+	struct strbuf sb = STRBUF_INIT;
+
+	/* search for parent revs of a and b */
+	init_revisions(&parent_args.common_parents, NULL);
+	parent_args.common_parents.no_walk = 1;
+	parent_args.a = a;
+	parent_args.b = b;
+	for_each_rawref_submodule(path, add_common_parents, &parent_args);
+
+	if (prepare_revision_walk(&parent_args.common_parents))
+		return 0;
+
+	i = 0;
+	while ((commit = get_revision(&parent_args.common_parents))) {
+		struct pretty_print_context ctx = {0};
+		ctx.date_mode = parent_args.common_parents.date_mode;
+		format_commit_message(commit, format, &sb, &ctx);
+		strbuf_addstr(&sb, "\n");
+		if (i == 0)
+			hashcpy(sha1, commit->object.sha1);
+		i++;
+	}
+
+	/* we found exactly one revision */
+	if (i == 1) {
+		hashcpy(result, sha1);
+		ret = 1;
+		goto finish;
+	}
+
+	warning("Found multiple possible merge resolutions for submodule '%s':", path);
+	fprintf(stderr, "%s", sb.buf);
+
+finish:
+	strbuf_release(&sb);
+	return ret;
+}
+
+int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20],
+		    const unsigned char a[20], const unsigned char b[20])
+{
+	struct commit *commit_base, *commit_a, *commit_b;
+	int parent_exists;
+
+	/* store a in result in case we fail */
+	hashcpy(result, a);
+
+	/* we can not handle deletion conflicts */
+	if (is_null_sha1(base))
+		return 0;
+	if (is_null_sha1(a))
+		return 0;
+	if (is_null_sha1(b))
+		return 0;
+
+	if (add_submodule_odb(path)) {
+		warning("Failed to merge submodule %s (not checked out)", path);
+		return 0;
+	}
+
+	if (!(commit_base = lookup_commit_reference(base)) ||
+	    !(commit_a = lookup_commit_reference(a)) ||
+	    !(commit_b = lookup_commit_reference(b)))
+	{
+		warning("Failed to merge submodule %s (commits not present)", path);
+		return 0;
+	}
+
+	/* are both changes forward */
+	if (!in_merge_bases(commit_base, &commit_a, 1) ||
+	    !in_merge_bases(commit_base, &commit_b, 1))
+	{
+		warning("Submodule rewound can not merge");
+		return 0;
+	}
+
+	/* find commit which merges them */
+	parent_exists = find_common_parent(result, path, commit_a, commit_b);
+	if (!parent_exists) {
+		warning("Failed to merge submodule %s (merge not found)", path);
+		return 0;
+	}
+	return 1;
+}
diff --git a/submodule.h b/submodule.h
index dbda270..b75a704 100644
--- a/submodule.h
+++ b/submodule.h
@@ -6,5 +6,7 @@ void show_submodule_summary(FILE *f, const char *path,
 		unsigned dirty_submodule,
 		const char *del, const char *add, const char *reset);
 unsigned is_submodule_modified(const char *path, int ignore_untracked);
+int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20],
+		    const unsigned char a[20], const unsigned char b[20]);
 
 #endif
diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh
index 4a7b893..04dc371 100755
--- a/t/t7405-submodule-merge.sh
+++ b/t/t7405-submodule-merge.sh
@@ -54,13 +54,116 @@ test_expect_success setup '
 	git merge -s ours a
 '
 
-test_expect_success 'merging with modify/modify conflict' '
+# History setup
+#
+#      b
+#    /   \
+#   a     d
+#    \   /
+#      c
+#
+# a in the main repository records to sub-a in the submodule and
+# analogous b and c. d should be automatically found by merging c into
+# b in the main repository.
+test_expect_success 'setup for merge search' '
+	mkdir merge-search &&
+	cd merge-search &&
+	git init &&
+	mkdir sub &&
+	(cd sub &&
+	 git init &&
+	 echo "file-a" > file-a &&
+	 git add file-a &&
+	 git commit -m "sub-a" &&
+	 git checkout -b sub-a) &&
+	git add sub &&
+	git commit -m "a" &&
+	git checkout -b a &&
+
+	git checkout -b b &&
+	(cd sub &&
+	 git checkout -b sub-b &&
+	 echo "file-b" > file-b &&
+	 git add file-b &&
+	 git commit -m "sub-b") &&
+	git commit -a -m "b" &&
+
+	git checkout -b c a &&
+	(cd sub &&
+	 git checkout -b sub-c sub-a &&
+	 echo "file-c" > file-c &&
+	 git add file-c &&
+	 git commit -m "sub-c") &&
+	git commit -a -m "c" &&
+
+	(cd sub &&
+	 git checkout -b sub-d sub-b &&
+	 git merge sub-c &&
+	 git checkout sub-b) &&
+	git checkout -b test b &&
+	cd ..
+'
+
+test_expect_success 'merging with common parent search' '
+	cd merge-search &&
+	git checkout -b test-parent b &&
+	git merge c &&
+	git ls-tree test-parent | grep sub | cut -f1 | cut -f3 -d" " > actual &&
+	(cd sub &&
+	 git rev-parse sub-d > ../expect) &&
+	test_cmp actual expect &&
+	cd ..
+'
+
+test_expect_success 'merging should fail for ambigous common parent' '
+	cd merge-search &&
+	git checkout -b test-ambigous b &&
+	(cd sub &&
+	 git checkout -b ambigous sub-d &&
+	 echo "ambigous-file" > ambigous-file &&
+	 git add ambigous-file &&
+	 git commit -m "ambigous") &&
+	test_must_fail git merge c &&
+	git reset --hard &&
+	cd ..
+'
+
+# in a situation like this
+#
+# submodule tree:
+#
+#    sub-a --- sub-b --- sub-d
+#
+# main tree:
+#
+#    e (sub-a)
+#   /
+#  d (sub-b)
+#   \
+#    f (sub-d)
+#
+# A merge should fail because one change points backwards.
+
+test_expect_success 'merging should fail for changes that are backwards' '
+	cd merge-search &&
+	git checkout -b d a &&
+	(cd sub &&
+	 git checkout sub-b) &&
+	git commit -a -m "d" &&
+
+	git checkout -b e d &&
+	(cd sub &&
+	 git checkout sub-a) &&
+	git commit -a -m "e" &&
+
+	git checkout -b f d &&
+	(cd sub &&
+	 git checkout sub-d) &&
+	git commit -a -m "f" &&
 
-	git checkout -b test1 a &&
-	test_must_fail git merge b &&
-	test -f .git/MERGE_MSG &&
-	git diff &&
-	test -n "$(git ls-files -u)"
+	git checkout -b test-backward e &&
+	test_must_fail git merge f &&
+	cd ..
 '
 
 test_expect_success 'merging with a modify/modify conflict between merge bases' '
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-11 12:23 [WIP PATCH 0/3] implement merge strategy for submodule links Heiko Voigt
                   ` (2 preceding siblings ...)
  2010-06-11 12:23 ` [WIP PATCH 3/3] implement automatic fast forward merge for submodules Heiko Voigt
@ 2010-06-12 10:12 ` Johan Herland
  2010-06-12 12:06   ` Heiko Voigt
  3 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-12 10:12 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: git

On Friday 11 June 2010, Heiko Voigt wrote:
> The following patch series is a work in progress. The idea is whenever
> you need to merge two SHA1's of a submodule we search for a ref in the
> submodule which already contains both. If one such ref exists the
> resulting SHA1 is the one pointed at by that ref.

I appreciate the effort to improve submodule handling, but I'm not sure I 
like this approach. Even though you try to apply it as conservatively as 
possible, it still smells a little like trying to make Git too clever for 
its own good.

E.g. say we have the following commit history in the submodule:

  A---B---C---D  <-- master

Now, say that your merge conflict comes from one branch updating the 
submodule from B to C, while the other branch reverts the submodule from B 
to A. In your proposed scheme, Git would auto-resolve the conflict to D.

In this case Git has no way of knowing whether the update from B to C is 
"better" than the revert from B to A. Maybe the revert to A happened because 
there is a showstopper bug in B that has not yet been fixed, and the best 
solution is to revert to A until a fix can be made. Or maybe C fixes that 
showstopper bug, so C is safe after all.

In any case, fast-forwarding to D seems irresponsible, since we have no 
concept of how well D is tested. Maybe it introduces another showstopper 
bug, and that is why neither branch has upgraded to it yet?

This whole idea is somewhat similar to branch-tracking submodules (recently 
discussed in another thread), except that it only applies on _merge_ in the 
superproject, and you don't get to choose _which_ branch it's tracking. 
That's _way_ too arbitrary for my tastes.

> The implementation currently searches through all refs and if one (and
> only one) ref exists which contains both sides it merges. In all other
> cases it fails.

Still doesn't solve the fundamental A---B---C---D problem I demonstrated 
above.

> Future Plans:
> 
>   * Only search stable branches. E.g. by default only master and
>     */master. The stable branch list will be configurable.

What is this "stable" branch of which you speak? "Stable" is a very relative 
concept, depending on which repo you're working in, and which branch you're 
working on. In any case, master is often not the most stable branch in a 
given repo. In git.git for example, maint is more stable than master. Also, 
I have many repos where master should not be considered "stable" at all...

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-12 10:12 ` [WIP PATCH 0/3] implement merge strategy for submodule links Johan Herland
@ 2010-06-12 12:06   ` Heiko Voigt
  2010-06-13 17:59     ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Heiko Voigt @ 2010-06-12 12:06 UTC (permalink / raw)
  To: Johan Herland; +Cc: git

Hi,

On Sat, Jun 12, 2010 at 12:12:50PM +0200, Johan Herland wrote:
> On Friday 11 June 2010, Heiko Voigt wrote:
> > The following patch series is a work in progress. The idea is whenever
> > you need to merge two SHA1's of a submodule we search for a ref in the
> > submodule which already contains both. If one such ref exists the
> > resulting SHA1 is the one pointed at by that ref.
> 
> I appreciate the effort to improve submodule handling, but I'm not sure I 
> like this approach. Even though you try to apply it as conservatively as 
> possible, it still smells a little like trying to make Git too clever for 
> its own good.
> 
> E.g. say we have the following commit history in the submodule:
> 
>   A---B---C---D  <-- master
> 
> Now, say that your merge conflict comes from one branch updating the 
> submodule from B to C, while the other branch reverts the submodule from B 
> to A. In your proposed scheme, Git would auto-resolve the conflict to D.

You are right. I did forget to mention this in my topic letter: Both
changes need to point forward. This exact case is also tested in the
testcases and results in a merge conflict which needs to be resolved by
hand.

> This whole idea is somewhat similar to branch-tracking submodules (recently 
> discussed in another thread), except that it only applies on _merge_ in the 
> superproject, and you don't get to choose _which_ branch it's tracking. 
> That's _way_ too arbitrary for my tastes.

The difference to branch-tracking submodules is, if I understand it
correctly, that with a merge you get an explicit SHA1 which is recorded.
Whereras with branch-tracking you never know on which revision on the
tracked branch the submodule was.

Thats why I only want to search through stable branches further down. I
mean stable in the git sense that they never get rewound and of course
should contain the most stable part of development. To ease the
configuration we would default to master which we could assume as
stable. But if we want to be on the safe side we could also say that
automatic submodule merging only works when the user has configured some
stable branches.

> > Future Plans:
> > 
> >   * Only search stable branches. E.g. by default only master and
> >     */master. The stable branch list will be configurable.
> 
> What is this "stable" branch of which you speak? "Stable" is a very relative 
> concept, depending on which repo you're working in, and which branch you're 
> working on. In any case, master is often not the most stable branch in a 
> given repo. In git.git for example, maint is more stable than master. Also, 
> I have many repos where master should not be considered "stable" at all...

See above.

cheers Heiko

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-12 12:06   ` Heiko Voigt
@ 2010-06-13 17:59     ` Johan Herland
  2010-06-14 17:02       ` Heiko Voigt
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-13 17:59 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: git

On Saturday 12 June 2010, Heiko Voigt wrote:
> On Sat, Jun 12, 2010 at 12:12:50PM +0200, Johan Herland wrote:
> > On Friday 11 June 2010, Heiko Voigt wrote:
> > > The following patch series is a work in progress. The idea is
> > > whenever you need to merge two SHA1's of a submodule we search for a
> > > ref in the submodule which already contains both. If one such ref
> > > exists the resulting SHA1 is the one pointed at by that ref.
> > 
> > I appreciate the effort to improve submodule handling, but I'm not sure
> > I like this approach. Even though you try to apply it as
> > conservatively as possible, it still smells a little like trying to
> > make Git too clever for its own good.
> > 
> > E.g. say we have the following commit history in the submodule:
> >   A---B---C---D  <-- master
> > 
> > Now, say that your merge conflict comes from one branch updating the
> > submodule from B to C, while the other branch reverts the submodule
> > from B to A. In your proposed scheme, Git would auto-resolve the
> > conflict to D.
> 
> You are right. I did forget to mention this in my topic letter: Both
> changes need to point forward. This exact case is also tested in the
> testcases and results in a merge conflict which needs to be resolved by
> hand.

Still doesn't solve one of the cases I gave in the last email: Say one 
branch updates the submodule from A to B, and the other updates from A to C. 
Your proposal resolves the merge by fast-forwarding to D, which seems 
irresponsible, since we have no concept of how well D is tested. Maybe it 
introduces another showstopper bug, and that is why neither branch has 
upgraded to it yet?

A better solution would be, to put it generally: Given a submodule being 
part of a superproject conflict, if one of the candidate submodule SHA1s is 
is a descendant of _all_ the other submodule SHA1 candidates, then choose 
that SHA1 as the proposed resolution (but please leave the index entry 
"unmerged", so that the resolution must be confirmed by the user).

This removes all the "stable" branch magic from your patch. All you need to 
look at are the candidate SHA1s and their relationship in the commit graph. 
No refs involved.

In the A->B vs. A->C case above, we would see that C is a descendant of B, 
and we would therefore choose C as a suggested conflict resolution, which 
IMHO is a much better choice than D.

I still don't want to add a lot of auto-resolving cleverness to Git, as it 
inevitably _will_ choose incorrectly sometimes, and in those situations it 
will be much more confusing than if it didn't choose at all.

> > This whole idea is somewhat similar to branch-tracking submodules
> > (recently discussed in another thread), except that it only applies on
> > _merge_ in the superproject, and you don't get to choose _which_
> > branch it's tracking. That's _way_ too arbitrary for my tastes.
> 
> The difference to branch-tracking submodules is, if I understand it
> correctly, that with a merge you get an explicit SHA1 which is recorded.
> Whereras with branch-tracking you never know on which revision on the
> tracked branch the submodule was.

Technically you may be right, but my point is that in your original proposal 
I don't get to _choose_ which submodule SHA1 is explicitly recorded for the 
merge resolution, but instead your patch chooses whatever SHA1 happens to be 
at the tip of some branch considered "stable". Although technically 
different, this is similar in _spirit_ to what branch-tracking submodules is 
about.

> Thats why I only want to search through stable branches further down. I
> mean stable in the git sense that they never get rewound and of course
> should contain the most stable part of development. To ease the
> configuration we would default to master which we could assume as
> stable. But if we want to be on the safe side we could also say that
> automatic submodule merging only works when the user has configured some
> stable branches.

Ok, so you can configure exactly which branch(es) you consider stable. I'd 
still much rather prefer the approach I outlined above, which does away with 
all the "stable" branch magic, and only considers the commit ancestry 
directly.

Hope this helps,

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-13 17:59     ` Johan Herland
@ 2010-06-14 17:02       ` Heiko Voigt
  2010-06-14 23:59         ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Heiko Voigt @ 2010-06-14 17:02 UTC (permalink / raw)
  To: Johan Herland; +Cc: git

Hi,

On Sun, Jun 13, 2010 at 07:59:43PM +0200, Johan Herland wrote:
> On Saturday 12 June 2010, Heiko Voigt wrote:
> > On Sat, Jun 12, 2010 at 12:12:50PM +0200, Johan Herland wrote:
> > > E.g. say we have the following commit history in the submodule:
> > >   A---B---C---D  <-- master
> > > 
> > > Now, say that your merge conflict comes from one branch updating the
> > > submodule from B to C, while the other branch reverts the submodule
> > > from B to A. In your proposed scheme, Git would auto-resolve the
> > > conflict to D.
> > 
> > You are right. I did forget to mention this in my topic letter: Both
> > changes need to point forward. This exact case is also tested in the
> > testcases and results in a merge conflict which needs to be resolved by
> > hand.
> 
> Still doesn't solve one of the cases I gave in the last email: Say one 
> branch updates the submodule from A to B, and the other updates from A to C. 
> Your proposal resolves the merge by fast-forwarding to D, which seems 
> irresponsible, since we have no concept of how well D is tested. Maybe it 
> introduces another showstopper bug, and that is why neither branch has 
> upgraded to it yet?
> 
> A better solution would be, to put it generally: Given a submodule being 
> part of a superproject conflict, if one of the candidate submodule SHA1s is 
> is a descendant of _all_ the other submodule SHA1 candidates, then choose 
> that SHA1 as the proposed resolution (but please leave the index entry 
> "unmerged", so that the resolution must be confirmed by the user).

Is there currently any logic to support a "suggested" merge resolution
in git.git? I am not that familiar with code base yet and I do not think
that I have seen something like that. Is it done somewhere already?

> This removes all the "stable" branch magic from your patch. All you need to 
> look at are the candidate SHA1s and their relationship in the commit graph. 
> No refs involved.
> 
> In the A->B vs. A->C case above, we would see that C is a descendant of B, 
> and we would therefore choose C as a suggested conflict resolution, which 
> IMHO is a much better choice than D.
> 
> I still don't want to add a lot of auto-resolving cleverness to Git, as it 
> inevitably _will_ choose incorrectly sometimes, and in those situations it 
> will be much more confusing than if it didn't choose at all.

I see your point. But nevertheless there is a specific workflow I target
to support which is not supported by your approach:

Lets assume Alice creates a feature branch feature_a for her development
and needs to modify the submodule and creates a branch there as well. At
the same time Bob develops feature_b and also needs changes in the
submodule and so he creates a feature branch there as well.

Assume we now have the following history in the submodule:

  B---C---D         [feature_a]
 /         \
A---E---F---G---K   [master]
     \         /
      H---I---J     [feature_b]

Now during the development of her branch Alice would link D in the
superproject as it is the tip of her branch. Bob would do the same and
link to J as his tip. Now Alice sends out her branch to the reviewers
and after everybody is happy with it the maintainer merges her branch
first. The superproject links to D. Now Bob does the same and the
maintainer wants to merge his branch and gets a merge conflict because D
and J do not have a parent/children relationship.

I think this is a fairly natural pattern which evolves from the use of
feature branches in git. So I would like to make git behave naturally
for this workflow and automatically merge.

Now your point is that master could be wrong and you are right, but
normal merges can go wrong in a similar way. Just imagine this:

Alice adds a parameter to the static function somefunc() and changes all
callsites of it in her branch. Independently Bob writes new code in
his branch that uses somefunc() with the old signature. When both
branches are merged git has no chance of doing it right and the code
will not compile. So even normal merging is always a little heuristic.
Question is: How well does the heuristic perform in practise.

> > Thats why I only want to search through stable branches further down. I
> > mean stable in the git sense that they never get rewound and of course
> > should contain the most stable part of development. To ease the
> > configuration we would default to master which we could assume as
> > stable. But if we want to be on the safe side we could also say that
> > automatic submodule merging only works when the user has configured some
> > stable branches.
> 
> Ok, so you can configure exactly which branch(es) you consider stable. I'd 
> still much rather prefer the approach I outlined above, which does away with 
> all the "stable" branch magic, and only considers the commit ancestry 
> directly.

Ok what do you think about combining both approaches: If no stable
branches are configured we default to your strategy and if the user
wants some magic (I mean isn't that what git is all about: magic)
configuring stable branches will enable git to resolve conflicts like
the ones I described above.

My feeling is that in practise automatic merging into stable branches
will work well and the cases of failure will be neglectable to not
happening at all. So my approach would be to go ahead, implement the
strategy and let people play around with it so we can collect some real
life data whether it is helping or making matters worse.

cheers Heiko

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-14 17:02       ` Heiko Voigt
@ 2010-06-14 23:59         ` Johan Herland
  2010-06-15 17:37           ` Jens Lehmann
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-14 23:59 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: git

On Monday 14 June 2010, Heiko Voigt wrote:
> On Sun, Jun 13, 2010 at 07:59:43PM +0200, Johan Herland wrote:
> > On Saturday 12 June 2010, Heiko Voigt wrote:
> > A better solution would be, to put it generally: Given a submodule
> > being part of a superproject conflict, if one of the candidate
> > submodule SHA1s is is a descendant of _all_ the other submodule SHA1
> > candidates, then choose that SHA1 as the proposed resolution (but
> > please leave the index entry "unmerged", so that the resolution must
> > be confirmed by the user).
> 
> Is there currently any logic to support a "suggested" merge resolution
> in git.git? I am not that familiar with code base yet and I do not think
> that I have seen something like that. Is it done somewhere already?

In the case of a regular file, you can replace the default working tree 
version (the one with conflict markers) by a version containing your 
suggested merge resolution, but NOT add that version to the index, so the 
index still thinks the file is unmerged. You then 'git add' the file to 
acknowledge the suggested resolution.

In the case of submodules, you would check out the suggested version of the 
submodule, but not add its SHA1 to the superproject index. Again, the user 
acknowledges the suggested resolution by 'git add'ing the submodule.

My point is that when Git tries to suggest merge resolutions, it should 
purposefully NOT add these to the index, so that the user HAS to acknowledge 
them. This is similar to the default behaviour of 'git rerere' which 
resolves your conflicts automatically, but does not touch the corresponding 
"unmerged" index entries, so that you manually have to 'git add' the result.

> > This removes all the "stable" branch magic from your patch. All you
> > need to look at are the candidate SHA1s and their relationship in the
> > commit graph. No refs involved.
> > 
> > In the A->B vs. A->C case above, we would see that C is a descendant of
> > B, and we would therefore choose C as a suggested conflict resolution,
> > which IMHO is a much better choice than D.
> > 
> > I still don't want to add a lot of auto-resolving cleverness to Git, as
> > it inevitably _will_ choose incorrectly sometimes, and in those
> > situations it will be much more confusing than if it didn't choose at
> > all.
> 
> I see your point. But nevertheless there is a specific workflow I target
> to support which is not supported by your approach:
> 
> Lets assume Alice creates a feature branch feature_a for her development
> and needs to modify the submodule and creates a branch there as well. At
> the same time Bob develops feature_b and also needs changes in the
> submodule and so he creates a feature branch there as well.
> 
> Assume we now have the following history in the submodule:
> 
>   B---C---D         [feature_a]
>  /         \
> A---E---F---G---K   [master]
>      \         /
>       H---I---J     [feature_b]
> 
> Now during the development of her branch Alice would link D in the
> superproject as it is the tip of her branch. Bob would do the same and
> link to J as his tip. Now Alice sends out her branch to the reviewers
> and after everybody is happy with it the maintainer merges her branch
> first. The superproject links to D.

No. The superproject would get a conflict between the A->D and A->F updates 
of the submodule. The correct resolution would be to go into the submodule, 
do the merge to produce G, and then record this as the correct merge 
resolution in the superproject.

You want Git to do this automatically for you, whereas I think that Git 
should not be that "clever", because there are situations (as I've 
demonstrated previously in this thread) where the "cleverness" would do The 
Wrong Thing.

> Now Bob does the same and the
> maintainer wants to merge his branch and gets a merge conflict because D
> and J do not have a parent/children relationship.

Well, s/D/G/, but your point still stands. And the correct resolution is, of 
course, to merge G and J to produce K, and then record K in the superproject 
as the correct merge resolution.

Again, the question is whether Git should do these submodule merges 
automatically, or not.

> I think this is a fairly natural pattern which evolves from the use of
> feature branches in git. So I would like to make git behave naturally
> for this workflow and automatically merge.

Please keep in mind that your workflow is but one, and Git has to support a 
wide variety of different workflows without breaking down.

It actually seems to me that - in your workflow scenario, where the 
submodule seems to be fairly tightly coupled to the superproject - you would 
be better off using branch-tracking submodules (recently discussed in the 
thread called 'RFC: Making submodules "track" branches').

When using branch-tracking submodules, Alice would configure the submodule 
to track the "feature_a" branch, while Bob would configure the submodule to 
track the "feature_b" branch. When merging these branches, the correct merge 
resolution would be (after having merged the submodule feature branches back 
into the submodule "master") to track the "master" branch in the submodule.

When merging the two feature branches back into "master" there would (in 
addition to the conflicted submodule entry) be conflicts in the .gitmodules 
file on which submodule branch to track (I'm following Ævar's proposal 
here), and the resolution of _that_ conflict would specify which submodule 
branch/version to use in the resolved merge.

Now, in your proposal, you would have an _additional_ config variable for 
controlling which submodule branch is equivalent to "master" in the above 
example. If this branch were to be different from the "tracked" branch (as 
defined in Ævar's proposal), you would be in the deeply confusing situation 
of having the merge resolved to the tip of one branch, while you've told the 
submodule to track a _different_ branch. The only thing that makes sense 
would be for these two variables to always be identical, at which point you 
should simply eliminate one of them.

I guess what I'm getting at (sorry for taking a while to get here) is that 
you could maybe solve your problem by a combination of what I suggested in 
my previous mail, plus the use of branch-tracking submodules. There are 
still some things to be worked out here, but I don't believe adding an 
almost-but-not-quite-submodule-branch-tracking option is the best way to go.

> Now your point is that master could be wrong and you are right, but
> normal merges can go wrong in a similar way. Just imagine this:
> 
> Alice adds a parameter to the static function somefunc() and changes all
> callsites of it in her branch. Independently Bob writes new code in
> his branch that uses somefunc() with the old signature. When both
> branches are merged git has no chance of doing it right and the code
> will not compile. So even normal merging is always a little heuristic.
> Question is: How well does the heuristic perform in practise.

True, but I don't necessarily accept that one sometimes-wrong heuristic 
justifies another sometimes-wrong heuristic. Follow that logic, and we can 
pile on heuristics until we almost always get something wrong...

> > Ok, so you can configure exactly which branch(es) you consider stable.
> > I'd still much rather prefer the approach I outlined above, which does
> > away with all the "stable" branch magic, and only considers the commit
> > ancestry directly.
> 
> Ok what do you think about combining both approaches: If no stable
> branches are configured we default to your strategy and if the user
> wants some magic (I mean isn't that what git is all about: magic)
> configuring stable branches will enable git to resolve conflicts like
> the ones I described above.

FWIW, IMHO Git is NOT about magic at all. It even says so at the top of the 
git(1) manual page: "git - the stupid content tracker". And both Junio and 
Linus have repeatedly argued how Git purposefully only auto-resolves the 
_simple_ cases, and leaves the _hard_ cases to the user, since trying to be 
clever about the hard cases inevitably leads to more confusion and insanity.

In this case, your scenario/proposal just about crosses into what I consider 
the _hard_ space, which is why I'm critical of the cleverness.

As for combining both approaches (subject to some config option), I guess 
that could work, but I'd certainly like to see a significant amount of 
support for your proposal before we go there.

> My feeling is that in practise automatic merging into stable branches
> will work well and the cases of failure will be neglectable to not
> happening at all. So my approach would be to go ahead, implement the
> strategy and let people play around with it so we can collect some real
> life data whether it is helping or making matters worse.

Feel free to post the patches, if you can spend the time making them. So 
far, there's been no other feedback in this thread, so maybe I'm alone in my 
worries...

Cheers,

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-14 23:59         ` Johan Herland
@ 2010-06-15 17:37           ` Jens Lehmann
  2010-06-16  0:05             ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Jens Lehmann @ 2010-06-15 17:37 UTC (permalink / raw)
  To: Johan Herland; +Cc: Heiko Voigt, git

Am 15.06.2010 01:59, schrieb Johan Herland:
> My point is that when Git tries to suggest merge resolutions, it should 
> purposefully NOT add these to the index, so that the user HAS to acknowledge 
> them. This is similar to the default behaviour of 'git rerere' which 
> resolves your conflicts automatically, but does not touch the corresponding 
> "unmerged" index entries, so that you manually have to 'git add' the result.

I like that idea, as it avoids having unintended submodule commits added
silently to the superprojects index by the merge.


>> Lets assume Alice creates a feature branch feature_a for her development
>> and needs to modify the submodule and creates a branch there as well. At
>> the same time Bob develops feature_b and also needs changes in the
>> submodule and so he creates a feature branch there as well.
>>
>> Assume we now have the following history in the submodule:
>>
>>   B---C---D         [feature_a]
>>  /         \
>> A---E---F---G---K   [master]
>>      \         /
>>       H---I---J     [feature_b]
>>
>> Now during the development of her branch Alice would link D in the
>> superproject as it is the tip of her branch. Bob would do the same and
>> link to J as his tip. Now Alice sends out her branch to the reviewers
>> and after everybody is happy with it the maintainer merges her branch
>> first. The superproject links to D.
> 
> No. The superproject would get a conflict between the A->D and A->F updates 
> of the submodule. The correct resolution would be to go into the submodule, 
> do the merge to produce G, and then record this as the correct merge 
> resolution in the superproject.

But as far as I understood this patch this merge has already been done
inside the submodule (at least this is what the setup of the test case
seems to do at a quick glance).


> You want Git to do this automatically for you, whereas I think that Git 
> should not be that "clever", because there are situations (as I've 
> demonstrated previously in this thread) where the "cleverness" would do The 
> Wrong Thing.
> 
>> Now Bob does the same and the
>> maintainer wants to merge his branch and gets a merge conflict because D
>> and J do not have a parent/children relationship.
> 
> Well, s/D/G/, but your point still stands. And the correct resolution is, of 
> course, to merge G and J to produce K, and then record K in the superproject 
> as the correct merge resolution.
> 
> Again, the question is whether Git should do these submodule merges 
> automatically, or not.

Hm, maybe I am missing something here, but isn't the question whether Git
should /use/ these submodule merges already done by a human being instead
of /doing them itself/? So isn't it just about making Git so clever it
proposes a merge already present in the submodule for recording in the
superproject when merging there?


> Feel free to post the patches, if you can spend the time making them. So 
> far, there's been no other feedback in this thread, so maybe I'm alone in my 
> worries...

I fully understand your worries concerning automagic merges inside a
submodule. But I really would like to see Git assisting me when merging
submodule commits in the superproject that have already been merged in
the submodule repo. And for me the first commit containing the others
is the one I would like to see then.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-15 17:37           ` Jens Lehmann
@ 2010-06-16  0:05             ` Johan Herland
  2010-06-16 17:16               ` Jens Lehmann
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-16  0:05 UTC (permalink / raw)
  To: Jens Lehmann; +Cc: Heiko Voigt, git

On Tuesday 15 June 2010, Jens Lehmann wrote:
> Am 15.06.2010 01:59, schrieb Johan Herland:
> >> Lets assume Alice creates a feature branch feature_a for her
> >> development and needs to modify the submodule and creates a branch
> >> there as well. At the same time Bob develops feature_b and also needs
> >> changes in the submodule and so he creates a feature branch there as
> >> well.
> >> 
> >> Assume we now have the following history in the submodule:
> >>   B---C---D         [feature_a]
> >>  /         \
> >> A---E---F---G---K   [master]
> >>      \         /
> >>       H---I---J     [feature_b]
> >> 
> >> Now during the development of her branch Alice would link D in the
> >> superproject as it is the tip of her branch. Bob would do the same and
> >> link to J as his tip. Now Alice sends out her branch to the reviewers
> >> and after everybody is happy with it the maintainer merges her branch
> >> first. The superproject links to D.
> > 
> > No. The superproject would get a conflict between the A->D and A->F
> > updates of the submodule. The correct resolution would be to go into
> > the submodule, do the merge to produce G, and then record this as the
> > correct merge resolution in the superproject.
> 
> But as far as I understood this patch this merge has already been done
> inside the submodule (at least this is what the setup of the test case
> seems to do at a quick glance).

Ok, let's look at a sequence of events:

0. There is a master branch in the superproject which points to commit A (on 
the master branch) in the submodule.

1. Alice creates feature_a branch in both superproject and submodule, 
creates commits B, C & D in the submodule and updates the superproject to 
point to D

2. Someone creates E in the submodule, and updates the master branch in the 
superproject to point to E.

3. Bob creates feature_b branch in both superproject and submodule, creates 
commits H, I & J in the submodule and updates the superproject to point to J

4. Someone creates F in the submodule, and updates the master branch in the 
superproject to point to F.

5. Maintainer starts integrating feature_a into master in superproject, and 
discovers the conflict between A->D and A->F. Mantainer then descends into 
submodule to create the merge G. Maintainer can now 'git add' the submodule 
in the superproject to record A->G as the merge resolution of the A->D vs. 
A->F conflict.

(6. Same as step #5, but replace Alice/A/D/F/G with Bob/E/J/G/K)

I assume here that nobody has made the merge commit G before Git produces 
the A->D vs. A->F conflict in step #5 (which prompts Maintainer to make G in 
order to resolve the conflict). I believe this would be the most common 
case.

If the merge commit G for some reason _already_ exists in the submodule 
before step #5, the maintainer's job is to simply recognize it as the 
correct resolution of the conflict, and check it out (and finally 'git add' 
it to the superproject index). But I don't see this happening very often: 
For one, who has the incentive to create G before it is needed in step #5? 
Both Alice and Bob are content with pointing to their respective submodule 
branches, and the only person who cares about doing the submodule merges is 
Maintainer who has to tie everything together into a coherent whole.

However, we should not require clairvoyance from the Maintainer as to which 
submodules have been modified by each feature branch, and hence which of 
them require preparatory submodule merges to be performed before the main 
superproject merge can be started. To the contrary, I believe the typical 
Maintainer will start the superproject merge, and then respond to the 
submodule conflicts that Git produces by descending into the submodule and 
merging submodules (or whatever else is required to reach a satisfactory 
submodule state).

Thus, if the purpose of Heiko's patches is to simply recognize merges that 
have already happened before step #5, then I'm afraid they will seldom or 
never be useful in practice (since these merges typically happen _after_ the 
superproject merge has been started).

> > You want Git to do this automatically for you, whereas I think that Git
> > should not be that "clever", because there are situations (as I've
> > demonstrated previously in this thread) where the "cleverness" would do
> > The Wrong Thing.
> > 
> >> Now Bob does the same and the
> >> maintainer wants to merge his branch and gets a merge conflict because
> >> D and J do not have a parent/children relationship.
> > 
> > Well, s/D/G/, but your point still stands. And the correct resolution
> > is, of course, to merge G and J to produce K, and then record K in the
> > superproject as the correct merge resolution.
> > 
> > Again, the question is whether Git should do these submodule merges
> > automatically, or not.
> 
> Hm, maybe I am missing something here, but isn't the question whether Git
> should /use/ these submodule merges already done by a human being instead
> of /doing them itself/? So isn't it just about making Git so clever it
> proposes a merge already present in the submodule for recording in the
> superproject when merging there?

Ah, yes, sorry, I confused the concepts at this point. Still:

- If the purpose is to re-use existing submodule merges then I'm afraid (as 
I've argued above) that this would happen too seldom to be useful in 
practice (and even then you would already have had to set up the appropriate 
config for your branch, to enable Git to find this pre-existing merge at 
all).

- If the purpose is to create new submodule merges to resolve the conflicts 
(which, granted, the patches currently don't do, but that I'm afraid they 
would _have_ to do in order to be useful in practice), then there is too 
much cleverness/magic for my liking.

> > Feel free to post the patches, if you can spend the time making them.
> > So far, there's been no other feedback in this thread, so maybe I'm
> > alone in my worries...
> 
> I fully understand your worries concerning automagic merges inside a
> submodule. But I really would like to see Git assisting me when merging
> submodule commits in the superproject that have already been merged in
> the submodule repo.

As I've argued above, I'm afraid this situation would seldom/never arise in 
practice.

Taking a step back and comparing the merging of submodules vs. the merging 
of regular files:

Git's rules are simple and straightforward for regular files: If both 
sides/branches have changed the same area of code (and the changes don't 
exactly coincide), you get a conflict. There's no magic/cleverness applied 
to try to figure out what a good resolution would look like; it's a 
conflict, and the user must resolve it. Simple as that.

I'd argue that the submodule case should be the same: If both sides/branches 
change the submodule (and the SHA1s don't exactly match), you get a 
conflict, and it's up to the user to resolve it.

We may to make an exception for the case where one SHA1 is a descendant of 
the other (i.e. a fast-forward situation), since that seems like a safe 
choice in most situations, but I don't feel safe doing much beyond that.

> And for me the first commit containing the others is the one I would like
> to see then.

In that case you will have to modify Heiko's patches, because (I believe) 
they currently choose the _latest_ commit containing the others...

Cheers,

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-16  0:05             ` Johan Herland
@ 2010-06-16 17:16               ` Jens Lehmann
  2010-06-16 21:32                 ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Jens Lehmann @ 2010-06-16 17:16 UTC (permalink / raw)
  To: Johan Herland; +Cc: Heiko Voigt, git

Am 16.06.2010 02:05, schrieb Johan Herland:
> - If the purpose is to re-use existing submodule merges then I'm afraid (as 
> I've argued above) that this would happen too seldom to be useful in 
> practice (and even then you would already have had to set up the appropriate 
> config for your branch, to enable Git to find this pre-existing merge at 
> all).

That this is all but happening seldom for us is the reason for this WIP
patch from Heiko. And other use cases won't be harmed by this change, no?
And if some are, we can add a config option to .gitmodules to control that.

> Taking a step back and comparing the merging of submodules vs. the merging 
> of regular files:
> 
> Git's rules are simple and straightforward for regular files: If both 
> sides/branches have changed the same area of code (and the changes don't 
> exactly coincide), you get a conflict. There's no magic/cleverness applied 
> to try to figure out what a good resolution would look like; it's a 
> conflict, and the user must resolve it. Simple as that.
> 
> I'd argue that the submodule case should be the same: If both sides/branches 
> change the submodule (and the SHA1s don't exactly match), you get a 
> conflict, and it's up to the user to resolve it.
> 
> We may to make an exception for the case where one SHA1 is a descendant of 
> the other (i.e. a fast-forward situation), since that seems like a safe 
> choice in most situations, but I don't feel safe doing much beyond that.

Yes, I would like to see that fast-forward case silently handled by a merge
in the superproject.

And if it is no fast-forward but you find a unique merge where both of these
SHA1s are included, you could advertise it as a possible solution but not
automagically add it to the index. So you give the maintainer of the
superproject the opportunity to assess a possible solution but spare him the
chore of trying to find the reason why the merge failed and what he can do
about it by showing him the right direction. He might then decide to take a
later commit of the submodule or resolve the whole issue differently, but
that is up to him.

>> And for me the first commit containing the others is the one I would like
>> to see then.
> 
> In that case you will have to modify Heiko's patches, because (I believe) 
> they currently choose the _latest_ commit containing the others...

Yes, but IMHO that is a bit too much forwarding.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-16 17:16               ` Jens Lehmann
@ 2010-06-16 21:32                 ` Johan Herland
  2010-06-16 22:11                   ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-16 21:32 UTC (permalink / raw)
  To: Jens Lehmann; +Cc: git, Heiko Voigt

On Wednesday 16 June 2010, Jens Lehmann wrote:
> Am 16.06.2010 02:05, schrieb Johan Herland:
> > - If the purpose is to re-use existing submodule merges then I'm afraid
> > (as I've argued above) that this would happen too seldom to be useful
> > in practice (and even then you would already have had to set up the
> > appropriate config for your branch, to enable Git to find this
> > pre-existing merge at all).
> 
> That this is all but happening seldom for us is the reason for this WIP
> patch from Heiko. And other use cases won't be harmed by this change, no?
> And if some are, we can add a config option to .gitmodules to control
> that.

Ok. I'm still not sure I see how this can happen frequently in practice, but 
since you both probably use submodules more heavily than I do, I will not 
stand in the way of progress.

> > Taking a step back and comparing the merging of submodules vs. the
> > merging of regular files:
> > 
> > Git's rules are simple and straightforward for regular files: If both
> > sides/branches have changed the same area of code (and the changes
> > don't exactly coincide), you get a conflict. There's no
> > magic/cleverness applied to try to figure out what a good resolution
> > would look like; it's a conflict, and the user must resolve it. Simple
> > as that.
> > 
> > I'd argue that the submodule case should be the same: If both
> > sides/branches change the submodule (and the SHA1s don't exactly
> > match), you get a conflict, and it's up to the user to resolve it.
> > 
> > We may to make an exception for the case where one SHA1 is a descendant
> > of the other (i.e. a fast-forward situation), since that seems like a
> > safe choice in most situations, but I don't feel safe doing much
> > beyond that.
> 
> Yes, I would like to see that fast-forward case silently handled by a
> merge in the superproject.
> 
> And if it is no fast-forward but you find a unique merge where both of
> these SHA1s are included, you could advertise it as a possible solution
> but not automagically add it to the index. So you give the maintainer of
> the superproject the opportunity to assess a possible solution but spare
> him the chore of trying to find the reason why the merge failed and what
> he can do about it by showing him the right direction. He might then
> decide to take a later commit of the submodule or resolve the whole
> issue differently, but that is up to him.

I still particularily don't like the added config variable for specifying 
which branch(es) are considered "stable". Would it be possible to instead 
search all submodule branches for the earliest commits that reconcile the 
two commits, and then inform the user that these may be interesting to look 
at when trying to find a resolution? Something like:

  Cannot auto-resolve conflict between $a_sha1 and $b_sha1 in submodule
  $foo. The following merge commits in submodule $foo may help you resolve
  this conflict:
    - $sha1 (present in branches $a, $b, $c)
    - $sha2 (present in branches $c, $d)
    - $sha3 (present in branches $e, $f)

Thus the user/maintainer gets the full picture, and are given as many 
alternatives as possible to help resolve the conflict, instead of 
automatically getting one (possibly wrong) resolution, just because the 
"stable" config was unset or incorrect.


...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-16 21:32                 ` Johan Herland
@ 2010-06-16 22:11                   ` Junio C Hamano
  2010-06-17  0:39                     ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2010-06-16 22:11 UTC (permalink / raw)
  To: Johan Herland; +Cc: Jens Lehmann, git, Heiko Voigt

Johan Herland <johan@herland.net> writes:

> On Wednesday 16 June 2010, Jens Lehmann wrote:
>> Am 16.06.2010 02:05, schrieb Johan Herland:
>> > - If the purpose is to re-use existing submodule merges then I'm afraid
>> > (as I've argued above) that this would happen too seldom to be useful
>> > in practice (and even then you would already have had to set up the
>> > appropriate config for your branch, to enable Git to find this
>> > pre-existing merge at all).
>> 
>> That this is all but happening seldom for us is the reason for this WIP
>> patch from Heiko. And other use cases won't be harmed by this change, no?
>> And if some are, we can add a config option to .gitmodules to control
>> that.
>
> Ok. I'm still not sure I see how this can happen frequently in practice, but 
> since you both probably use submodules more heavily than I do, I will not 
> stand in the way of progress.

At least it would be useful to learn how they manage to often produce the
submodule merge G.  Your scenario description was very clearly written and
in that particular workflow I didn't think it would be plausible to have
such a merge before it is needed.  IOW, their workflow must be quite
different from your scenario description, and I would like to see a
plausible scenario description that is as clearly written as yours;
perhaps that workflow can even be advertised as one of the BCP.

One possibility that comes to mind is perhaps Alice notices the presence
of F after she recorded D, merges D and F in the submodule to produce G in
the submodule repository, but does _not_ update the superproject to point
at it yet, for some reason.  Perhaps she hasn't tested the superproject
with the merged submodule yet.  Whatever the reason is, the tip of her
branch in the submodule would be ahead of what her superproject commit D
points at, but the commit is available to the maintainer to fetch.

Then the maintainer would see G in the submodule (after fetching both
superproject and submodule from Alice) already prepared to be used in a
merge between D and F.

I dunno.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-16 22:11                   ` Junio C Hamano
@ 2010-06-17  0:39                     ` Johan Herland
  2010-06-17 21:13                       ` Jens Lehmann
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-17  0:39 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jens Lehmann, git, Heiko Voigt

On Thursday 17 June 2010, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> > On Wednesday 16 June 2010, Jens Lehmann wrote:
> >> Am 16.06.2010 02:05, schrieb Johan Herland:
> >> > - If the purpose is to re-use existing submodule merges then I'm
> >> > afraid (as I've argued above) that this would happen too seldom to
> >> > be useful in practice (and even then you would already have had to
> >> > set up the appropriate config for your branch, to enable Git to
> >> > find this pre-existing merge at all).
> >> 
> >> That this is all but happening seldom for us is the reason for this
> >> WIP patch from Heiko. And other use cases won't be harmed by this
> >> change, no? And if some are, we can add a config option to
> >> .gitmodules to control that.
> > 
> > Ok. I'm still not sure I see how this can happen frequently in
> > practice, but since you both probably use submodules more heavily than
> > I do, I will not stand in the way of progress.
> 
> At least it would be useful to learn how they manage to often produce the
> submodule merge G.  Your scenario description was very clearly written
> and in that particular workflow I didn't think it would be plausible to
> have such a merge before it is needed.  IOW, their workflow must be
> quite different from your scenario description, and I would like to see
> a plausible scenario description that is as clearly written as yours;
> perhaps that workflow can even be advertised as one of the BCP.
> 
> One possibility that comes to mind is perhaps Alice notices the presence
> of F after she recorded D, merges D and F in the submodule to produce G
> in the submodule repository, but does _not_ update the superproject to
> point at it yet, for some reason.  Perhaps she hasn't tested the
> superproject with the merged submodule yet.  Whatever the reason is, the
> tip of her branch in the submodule would be ahead of what her
> superproject commit D points at, but the commit is available to the
> maintainer to fetch.

Dubious. If Alice's merge of D and F hasn't been properly tested yet, I 
don't see why it should exist on the submodule's master branch, and if it 
doesn't, it simply isn't considered, due to Heiko's "stable" branch magic.

> Then the maintainer would see G in the submodule (after fetching both
> superproject and submodule from Alice) already prepared to be used in a
> merge between D and F.
> 
> I dunno.

Me neither, but after some more thinking I have another alternative as to 
why the merge G might exist (on the submodule master branch) before the 
superproject merge is started:

If the submodule happens to be maintained as a truly separate project (with 
its own maintainer), then the maintainer of that submodule may have decided 
to merge Alice's feature_a branch on its own merits, without looking at the 
superproject at all. When the superproject maintainer later performs the 
superproject merge he can just pick up the submodule merge done by the 
submodule maintainer.

But this is pure speculation, and as you say, I'd like to see what workflows 
Jens and Heiko are actually using.


...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-17  0:39                     ` Johan Herland
@ 2010-06-17 21:13                       ` Jens Lehmann
  2010-06-18  9:40                         ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Jens Lehmann @ 2010-06-17 21:13 UTC (permalink / raw)
  To: Johan Herland; +Cc: Junio C Hamano, git, Heiko Voigt

Am 17.06.2010 02:39, schrieb Johan Herland:
> But this is pure speculation, and as you say, I'd like to see what workflows 
> Jens and Heiko are actually using.

Ok, here we go. And as I have difficulties thinking about that when looking
at a single graph, I'll draw two: The upper for the superproject and the
lower for the submodule.

Superproject:
  -----2         [Alice's branch]
 /      \
1--3-----4---5   [master]
    \       /
     ------6     [Bob's branch]

       ^   ^
       |   |     [commits of the submodule committed in the superproject]

Submodule:
  ---B           [feature_a]
 /    \
A--C---D---E     [master]
    \     /
     ----F       [feature_b]

Alice hacks away on her feature branch and notices she has to make changes
to a submodule. She creates the "feature_a" branch there with commit 'B'
and asks the maintainer of the submodule to review and merge her change.
Our policy is to never commit submodule commits that are not merged yet, as
they could just vanish (e.g. by rebasing; imagine having git as a submodule
and committing a SHA1 from the "pu" branch in the superproject ... a later
bisect might get really frustrating). So the submodule maintainer merges 'B'
into 'D' and tells Alice that. She commits 'D' for the submodule in her '2'
commit and asks the maintainer of the superproject to review and merge that.
The moment he merges that into '4', 'D' gets recorded in the master branch
of the superproject for the submodule.

Meanwhile Bob also needs a change in the submodule for his work in the
superproject and adds commit 'F' on the "feature_b" branch there. He waits
for the submodule maintainer to merge that into 'E' so he can do commit '6'.

But now the submodule commit 'D' in the superproject commit '4' has become
an obstacle for him and the superprojects maintainer. Bob can't rebase or
cherrypick beyond or up to '4' because he will get a merge conflict. If he
asks to merge his branch into '5', the superprojects maintainer will get a
merge conflict and tells to him to resolve that.

This situation would disappear when git merge would do fast-forwards for
submodule commits. And I argue that this is The Right Thing, because just as
commit '5' contains /all/ changes from both branches to the files it should
also contain /all/ changes to the submodules files that happened during
these branches. And that means merge should resolve the submodule to commit
'E'.

This is somehow similar to merging binary files. But for submodules Git has
a chance to tell the combined version of both changes in the fast-forward
case, whereas it can't know that for binary files. And yes, merge conflicts
could happen for the same reasons they may happen to files: The changes in
Bob's branch could break something in Alice's branch. But that applies for
files just like it does for submodule commits, no?

And the non-fast-forward case happens e.g. when Alice and Bob do not wait
for the submodule maintainer to merge their changes:

Superproject:
  ---2         [Alice's branch]
 /    \
1--3---4---5   [master]
    \     /
     ----6     [Bob's branch]

     ^   ^
     |   |       [commits of the submodule committed in the superproject]

Submodule:
  ---B           [feature_a]
 /    \
A--C---D---E     [master]
    \     /
     ----F       [feature_b]

In this case submodule commit 'B' is recorded in '2' and thus '4', while
commit 'F' will be recorded in '6'. So when '4' and '6' are merged, a valid
guess for '5' would be to use submodule commit 'E', as it is the first one
based on both 'B' and 'F'.

But in this case it is not so clear that 'E' is the right commit, as there
might be other commits present in the paths 'B'->'E' and 'F'->'E'. So 'E'
is just a probable solution for the merge, but not one I would like to see
automatically merged. But it should be proposed to the person doing the
merge as a probable resolution of the conflict, so that she can decide if
that is the case.

And no 'special' branch is used here. But I think this approach will solve
a lot of the problems we - and maybe others - have with submodule merges
without doing any harm to other workflows.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-17 21:13                       ` Jens Lehmann
@ 2010-06-18  9:40                         ` Johan Herland
  2010-06-18 13:55                           ` Jens Lehmann
                                             ` (2 more replies)
  0 siblings, 3 replies; 32+ messages in thread
From: Johan Herland @ 2010-06-18  9:40 UTC (permalink / raw)
  To: Jens Lehmann; +Cc: git, Junio C Hamano, Heiko Voigt

On Thursday 17 June 2010, Jens Lehmann wrote:
> Am 17.06.2010 02:39, schrieb Johan Herland:
> > But this is pure speculation, and as you say, I'd like to see what
> > workflows Jens and Heiko are actually using.
> 
> Ok, here we go. And as I have difficulties thinking about that when
> looking at a single graph, I'll draw two: The upper for the superproject
> and the lower for the submodule.
> 
> Superproject:
>   -----2         [Alice's branch]
>  /      \
> 1--3-----4---5   [master]
>     \       /
>      ------6     [Bob's branch]
> 
>        ^   ^
>        |   |     [commits of the submodule committed in the superproject]
> 
> Submodule:
>   ---B           [feature_a]
>  /    \
> A--C---D---E     [master]
>     \     /
>      ----F       [feature_b]
> 
> Alice hacks away on her feature branch and notices she has to make
> changes to a submodule. She creates the "feature_a" branch there with
> commit 'B' and asks the maintainer of the submodule to review and merge
> her change. Our policy is to never commit submodule commits that are not
> merged yet, as they could just vanish (e.g. by rebasing; imagine having
> git as a submodule and committing a SHA1 from the "pu" branch in the
> superproject ... a later bisect might get really frustrating). So the
> submodule maintainer merges 'B' into 'D' and tells Alice that. She
> commits 'D' for the submodule in her '2' commit and asks the maintainer
> of the superproject to review and merge that. The moment he merges that
> into '4', 'D' gets recorded in the master branch of the superproject for
> the submodule.
> 
> Meanwhile Bob also needs a change in the submodule for his work in the
> superproject and adds commit 'F' on the "feature_b" branch there. He
> waits for the submodule maintainer to merge that into 'E' so he can do
> commit '6'.
> 
> But now the submodule commit 'D' in the superproject commit '4' has
> become an obstacle for him and the superprojects maintainer. Bob can't
> rebase or cherrypick beyond or up to '4' because he will get a merge
> conflict. If he asks to merge his branch into '5', the superprojects
> maintainer will get a merge conflict and tells to him to resolve that.

Just verifying here: The superproject graph (with referenced submodule 
commits in parentheses) looks like this:

   --------2(D)            [Alice's branch]
  /         \
 1(A)--3(A)--4(D)---5(?)   [master]
        \          /
         ---------6(E)     [Bob's branch]

...and the conflict that causes problems when merging '4' and '6', is the 
'A'->'D' vs. 'A'->'E' submodule updates.

> This situation would disappear when git merge would do fast-forwards for
> submodule commits. And I argue that this is The Right Thing, because just
> as commit '5' contains /all/ changes from both branches to the files it
> should also contain /all/ changes to the submodules files that happened
> during these branches. And that means merge should resolve the submodule
> to commit 'E'.

I agree, and this is in line with my counter-proposal to Heiko: <quote> 
Given a submodule being part of a superproject conflict, if one of the 
candidate submodule SHA1s is a descendant of all the other submodule SHA1 
candidates, then choose that SHA1 as the proposed resolution</quote>.

> This is somehow similar to merging binary files. But for submodules Git
> has a chance to tell the combined version of both changes in the
> fast-forward case, whereas it can't know that for binary files. And yes,
> merge conflicts could happen for the same reasons they may happen to
> files: The changes in Bob's branch could break something in Alice's
> branch. But that applies for files just like it does for submodule
> commits, no?

Correct. I guess this means that - for the fast-forward case - Git can 
automatically record this resolution in the index, hence not requiring the 
user to "confirm" the resolution with 'git add'.

> And the non-fast-forward case happens e.g. when Alice and Bob do not wait
> for the submodule maintainer to merge their changes:
> 
> Superproject:
>   ---2         [Alice's branch]
>  /    \
> 1--3---4---5   [master]
>     \     /
>      ----6     [Bob's branch]
> 
>      ^   ^
>      |   |       [commits of the submodule committed in the superproject]
> 
> Submodule:
>   ---B           [feature_a]
>  /    \
> A--C---D---E     [master]
>     \     /
>      ----F       [feature_b]
> 
> In this case submodule commit 'B' is recorded in '2' and thus '4', while
> commit 'F' will be recorded in '6'. So when '4' and '6' are merged, a
> valid guess for '5' would be to use submodule commit 'E', as it is the
> first one based on both 'B' and 'F'.

Again, to verify: The superproject graph (with referenced submodule commits 
in parentheses) looks like this:

   --------2(B)            [Alice's branch]
  /         \
 1(A)--3(A)--4(B)---5(?)   [master]
        \          /
         ---------6(F)     [Bob's branch]

(Note that the situation would be different if '3' recorded 'C', as then '4' 
should record 'D' instead of 'B', IMHO.)

In this case, the conflict that causes problems when merging '4' and '6', is 
the 'A'->'B' vs. 'A'->'F' submodule updates.

And, indeed, in the scenario you present 'E' is probably the best guess '5'.

(Here, you still assume that although the submodule merges 'D'/'E' may not 
be done by the time Alice/Bob records '2'/'6', they are definitely done by 
the time the superproject maintainer gets around to creating '5'. Although 
this apparently works well for your case, I'm sure there are other scenarios 
where this is not the case, and are neither helped, nor hurt, by this 
effort.)

> But in this case it is not so clear that 'E' is the right commit, as
> there might be other commits present in the paths 'B'->'E' and 'F'->'E'.
> So 'E' is just a probable solution for the merge, but not one I would
> like to see automatically merged. But it should be proposed to the
> person doing the merge as a probable resolution of the conflict, so that
> she can decide if that is the case.

Agreed. Automatic resolving in this case is evil.

> And no 'special' branch is used here.

Well, you need to traverse _some_ submodule ref(s) in order to find 'E' at 
all. My argument is that there may also be _other_ submodule refs that 
contain merges of 'B' and 'F' as well, and they should _also_ be considered 
as valid candidates for the resolution in '5'. I would in fact argue that 
you should traverse _all_ submodule refs (maybe even including remote-
tracking refs) to look for merges of 'B' and 'F' [1], and present them all 
as equal alternatives.

Consider for example this submodule scenario:

        -----------G      [maint]
       /          /
   ---B--------  /        [feature_a]
  /    \       \/
 A--C---D---E  /\         [master]
     \     /  /  \
      ----F---    \       [feature_b]
              \    \
               --H--I--J  [next]

If there exist multiple merges that resolve 'B' and 'F' (in this case: 'G', 
'E' and 'I'), then all of those should be presented as equal alternatives to 
the user.

> But I think this approach will solve a lot of the problems we - and maybe
> others - have with submodule merges without doing any harm to other
> workflows.

For the fast-forward case, I fully agree.

For the non-fast-forward case, I would suggest to search for submodule 
merges that contain both submodule commits (as described in [1]), and then:

- If there are no merges, do nothing (leave a conflict).

- If there is exactly one merge, then check it out (but do not record it as 
resolved in the index).

- If there are more merge alternatives, present them as equal alternatives, 
but do nothing (leave a conflict).

Have fun! :)

...Johan

[1]: To put the search in general terms: Find all merge commits that has 
_both_ (or in the case of octopus; _all_) of the candidate commits (but none 
of the other merges) somewhere in its ancestry. You could implement this by 
first intersecting the sets returned from these commands (run in the 
submodule):

  git rev-list --merges --ancestry-path --all ^B
  git rev-list --merges --ancestry-path --all ^F

to get the set of merges descending from both 'B' and 'F', and then prune 
each member in the remaining set that has another set member in its 
ancestry.

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-18  9:40                         ` Johan Herland
@ 2010-06-18 13:55                           ` Jens Lehmann
  2010-06-19  9:43                             ` Heiko Voigt
  2010-06-19 10:17                           ` Heiko Voigt
  2010-06-20 18:04                           ` [WIP PATCH 0/3] implement merge strategy for submodule links Junio C Hamano
  2 siblings, 1 reply; 32+ messages in thread
From: Jens Lehmann @ 2010-06-18 13:55 UTC (permalink / raw)
  To: Johan Herland, Heiko Voigt; +Cc: git, Junio C Hamano

Am 18.06.2010 11:40, schrieb Johan Herland:
> On Thursday 17 June 2010, Jens Lehmann wrote:
>> Am 17.06.2010 02:39, schrieb Johan Herland:
>>> But this is pure speculation, and as you say, I'd like to see what
>>> workflows Jens and Heiko are actually using.
>>
>> Ok, here we go. And as I have difficulties thinking about that when
>> looking at a single graph, I'll draw two: The upper for the superproject
>> and the lower for the submodule.
>>
>> Superproject:
>>   -----2         [Alice's branch]
>>  /      \
>> 1--3-----4---5   [master]
>>     \       /
>>      ------6     [Bob's branch]
>>
>>        ^   ^
>>        |   |     [commits of the submodule committed in the superproject]
>>
>> Submodule:
>>   ---B           [feature_a]
>>  /    \
>> A--C---D---E     [master]
>>     \     /
>>      ----F       [feature_b]
>>
>> Alice hacks away on her feature branch and notices she has to make
>> changes to a submodule. She creates the "feature_a" branch there with
>> commit 'B' and asks the maintainer of the submodule to review and merge
>> her change. Our policy is to never commit submodule commits that are not
>> merged yet, as they could just vanish (e.g. by rebasing; imagine having
>> git as a submodule and committing a SHA1 from the "pu" branch in the
>> superproject ... a later bisect might get really frustrating). So the
>> submodule maintainer merges 'B' into 'D' and tells Alice that. She
>> commits 'D' for the submodule in her '2' commit and asks the maintainer
>> of the superproject to review and merge that. The moment he merges that
>> into '4', 'D' gets recorded in the master branch of the superproject for
>> the submodule.
>>
>> Meanwhile Bob also needs a change in the submodule for his work in the
>> superproject and adds commit 'F' on the "feature_b" branch there. He
>> waits for the submodule maintainer to merge that into 'E' so he can do
>> commit '6'.
>>
>> But now the submodule commit 'D' in the superproject commit '4' has
>> become an obstacle for him and the superprojects maintainer. Bob can't
>> rebase or cherrypick beyond or up to '4' because he will get a merge
>> conflict. If he asks to merge his branch into '5', the superprojects
>> maintainer will get a merge conflict and tells to him to resolve that.
> 
> Just verifying here: The superproject graph (with referenced submodule 
> commits in parentheses) looks like this:
> 
>    --------2(D)            [Alice's branch]
>   /         \
>  1(A)--3(A)--4(D)---5(?)   [master]
>         \          /
>          ---------6(E)     [Bob's branch]
> 
> ...and the conflict that causes problems when merging '4' and '6', is the 
> 'A'->'D' vs. 'A'->'E' submodule updates.

That's correct.


>> This is somehow similar to merging binary files. But for submodules Git
>> has a chance to tell the combined version of both changes in the
>> fast-forward case, whereas it can't know that for binary files. And yes,
>> merge conflicts could happen for the same reasons they may happen to
>> files: The changes in Bob's branch could break something in Alice's
>> branch. But that applies for files just like it does for submodule
>> commits, no?
> 
> Correct. I guess this means that - for the fast-forward case - Git can 
> automatically record this resolution in the index, hence not requiring the 
> user to "confirm" the resolution with 'git add'.

Yup, I think we agree here and I just wanted to explain our regular
workflow and show that such a strategy would help us very much.


>> And the non-fast-forward case happens e.g. when Alice and Bob do not wait
>> for the submodule maintainer to merge their changes:
>>
>> Superproject:
>>   ---2         [Alice's branch]
>>  /    \
>> 1--3---4---5   [master]
>>     \     /
>>      ----6     [Bob's branch]
>>
>>      ^   ^
>>      |   |       [commits of the submodule committed in the superproject]
>>
>> Submodule:
>>   ---B           [feature_a]
>>  /    \
>> A--C---D---E     [master]
>>     \     /
>>      ----F       [feature_b]
>>
>> In this case submodule commit 'B' is recorded in '2' and thus '4', while
>> commit 'F' will be recorded in '6'. So when '4' and '6' are merged, a
>> valid guess for '5' would be to use submodule commit 'E', as it is the
>> first one based on both 'B' and 'F'.
> 
> Again, to verify: The superproject graph (with referenced submodule commits 
> in parentheses) looks like this:
> 
>    --------2(B)            [Alice's branch]
>   /         \
>  1(A)--3(A)--4(B)---5(?)   [master]
>         \          /
>          ---------6(F)     [Bob's branch]

Correct.


>> But I think this approach will solve a lot of the problems we - and maybe
>> others - have with submodule merges without doing any harm to other
>> workflows.
> 
> For the fast-forward case, I fully agree.
> 
> For the non-fast-forward case, I would suggest to search for submodule 
> merges that contain both submodule commits (as described in [1]), and then:
> 
> - If there are no merges, do nothing (leave a conflict).
> 
> - If there is exactly one merge, then check it out (but do not record it as 
> resolved in the index).
> 
> - If there are more merge alternatives, present them as equal alternatives, 
> but do nothing (leave a conflict).

Nice summary. Heiko, would you please post a new patch implementing this
approach?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-18 13:55                           ` Jens Lehmann
@ 2010-06-19  9:43                             ` Heiko Voigt
  2010-06-19 15:54                               ` Jens Lehmann
  0 siblings, 1 reply; 32+ messages in thread
From: Heiko Voigt @ 2010-06-19  9:43 UTC (permalink / raw)
  To: Jens Lehmann; +Cc: Johan Herland, git, Junio C Hamano

On Fri, Jun 18, 2010 at 03:55:10PM +0200, Jens Lehmann wrote:
> Am 18.06.2010 11:40, schrieb Johan Herland:
> > On Thursday 17 June 2010, Jens Lehmann wrote:
> >> But I think this approach will solve a lot of the problems we - and maybe
> >> others - have with submodule merges without doing any harm to other
> >> workflows.
> > 
> > For the fast-forward case, I fully agree.
> > 
> > For the non-fast-forward case, I would suggest to search for submodule 
> > merges that contain both submodule commits (as described in [1]), and then:
> > 
> > - If there are no merges, do nothing (leave a conflict).
> > 
> > - If there is exactly one merge, then check it out (but do not record it as 
> > resolved in the index).
> > 
> > - If there are more merge alternatives, present them as equal alternatives, 
> > but do nothing (leave a conflict).
> 
> Nice summary. Heiko, would you please post a new patch implementing this
> approach?

Yes sure. I agree with the proposed scheme.

As Jens is working on the automatically checkout submodules extension I
will base the merge patch on your branch. Is the checkout_submodule()
function already stable enough to be used?

cheers Heiko

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-19  9:43                             ` Heiko Voigt
@ 2010-06-19 15:54                               ` Jens Lehmann
  0 siblings, 0 replies; 32+ messages in thread
From: Jens Lehmann @ 2010-06-19 15:54 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: Johan Herland, git, Junio C Hamano

Am 19.06.2010 11:43, schrieb Heiko Voigt:
> On Fri, Jun 18, 2010 at 03:55:10PM +0200, Jens Lehmann wrote:
>> Nice summary. Heiko, would you please post a new patch implementing this
>> approach?
> 
> Yes sure. I agree with the proposed scheme.

Thank you very much!


> As Jens is working on the automatically checkout submodules extension I
> will base the merge patch on your branch. Is the checkout_submodule()
> function already stable enough to be used?

Unfortunately not (the checkout itself is working fine, but the test if
the checkout would overwrite local changes in the submodules is still
missing, so be warned!).

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-18  9:40                         ` Johan Herland
  2010-06-18 13:55                           ` Jens Lehmann
@ 2010-06-19 10:17                           ` Heiko Voigt
  2010-06-19 13:15                             ` Johan Herland
  2010-06-20 18:04                           ` [WIP PATCH 0/3] implement merge strategy for submodule links Junio C Hamano
  2 siblings, 1 reply; 32+ messages in thread
From: Heiko Voigt @ 2010-06-19 10:17 UTC (permalink / raw)
  To: Johan Herland; +Cc: Jens Lehmann, git, Junio C Hamano

Hi,

On Fri, Jun 18, 2010 at 11:40:16AM +0200, Johan Herland wrote:
> [1]: To put the search in general terms: Find all merge commits that has 
> _both_ (or in the case of octopus; _all_) of the candidate commits (but none 
> of the other merges) somewhere in its ancestry. You could implement this by 
> first intersecting the sets returned from these commands (run in the 
> submodule):
> 
>   git rev-list --merges --ancestry-path --all ^B
>   git rev-list --merges --ancestry-path --all ^F
> 
> to get the set of merges descending from both 'B' and 'F', and then prune 
> each member in the remaining set that has another set member in its 
> ancestry.

Is the --ancestry-path option already implemented? Because on my git
1.7.1 it does not seem to. What does it do?

cheers Heiko

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-19 10:17                           ` Heiko Voigt
@ 2010-06-19 13:15                             ` Johan Herland
  2010-06-19 15:52                               ` [WIP PATCH 3/3] implement automatic fast forward merge for submodules Heiko Voigt
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-19 13:15 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: git, Jens Lehmann, Junio C Hamano

On Saturday 19 June 2010, Heiko Voigt wrote:
> Hi,
> 
> On Fri, Jun 18, 2010 at 11:40:16AM +0200, Johan Herland wrote:
> > [1]: To put the search in general terms: Find all merge commits that
> > has _both_ (or in the case of octopus; _all_) of the candidate commits
> > (but none of the other merges) somewhere in its ancestry. You could
> > implement this by first intersecting the sets returned from these
> > commands (run in the
> > 
> > submodule):
> >   git rev-list --merges --ancestry-path --all ^B
> >   git rev-list --merges --ancestry-path --all ^F
> > 
> > to get the set of merges descending from both 'B' and 'F', and then
> > prune each member in the remaining set that has another set member in
> > its ancestry.
> 
> Is the --ancestry-path option already implemented? Because on my git
> 1.7.1 it does not seem to.

It was recently merged to 'next'.

> What does it do?

When given a commit range ("$from..$to", or "$to ^$from"), it shows commit 
that are in $to but not in $from (i.e. the usual), but additionally limits 
the list to those commits that descend from $from. Another use case for this 
functionality is, given a bug introduced in commit $foo, you can list the 
commit in the master branch that are potentially "contaminated" with the 
bug, with the following command:

  git log --ancestry-path $foo..master

See the --ancestry-path documentation in the jc/rev-list-ancestry-path 
series for more info.


...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [WIP PATCH 3/3] implement automatic fast forward merge for submodules
  2010-06-19 13:15                             ` Johan Herland
@ 2010-06-19 15:52                               ` Heiko Voigt
  0 siblings, 0 replies; 32+ messages in thread
From: Heiko Voigt @ 2010-06-19 15:52 UTC (permalink / raw)
  To: Johan Herland; +Cc: git, Jens Lehmann, Junio C Hamano

This implements a simple merge strategy for submodule hashes. We check
whether one side of the merge candidates is already contained in the
other and then merge automatically.

If both sides contain changes we search for a merge in the submodule.
In case a single one exists we check that out and suggest it as the
merge resolution. A list of candidates is returned when we find multiple
merges that contain both sides of the changes.

This is useful for a workflow in which the developers can publish topic
branches in submodules and a seperate maintainer merges them. In case
the developers always wait until their branch gets merged before tracking
them in the superproject all merges of branches that contain submodule
changes will be resolved automatically. If developers choose to track
their feature branch the maintainer might get a conflict but git will
search the submodule for a merge and suggest it/them as a resolution.

Signed-off-by: Heiko Voigt <hvoigt@hvoigt.net>
---

This patch replaces the last one of the previously discussed series. It is
still work in progress but already implements the scheme discussed. I have not
had the time to adjust the tests so they fail currently. The extension to
setup_revisions() is still very hackyish which I will also cleanup in a later
iteration.

The whole series can also be found on:

http://github.com/hvoigt/git/tree/submodule_merge_v2

 merge-recursive.c          |    9 ++-
 refs.c                     |    6 ++
 refs.h                     |    1 +
 revision.c                 |   29 ++++++---
 revision.h                 |    1 +
 submodule.c                |  155 ++++++++++++++++++++++++++++++++++++++++++++
 submodule.h                |    2 +
 t/t7405-submodule-merge.sh |  115 +++++++++++++++++++++++++++++++--
 8 files changed, 300 insertions(+), 18 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index 206c103..a032a8b 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -20,6 +20,7 @@
 #include "attr.h"
 #include "merge-recursive.h"
 #include "dir.h"
+#include "submodule.h"
 
 static struct tree *shift_tree_object(struct tree *one, struct tree *two,
 				      const char *subtree_shift)
@@ -525,13 +526,15 @@ static void update_file_flags(struct merge_options *o,
 		void *buf;
 		unsigned long size;
 
-		if (S_ISGITLINK(mode))
+		if (S_ISGITLINK(mode)) {
 			/*
 			 * We may later decide to recursively descend into
 			 * the submodule directory and update its index
 			 * and/or work tree, but we do not do that now.
 			 */
+			update_wd = 0;
 			goto update_index;
+		}
 
 		buf = read_sha1_file(sha, &type, &size);
 		if (!buf)
@@ -716,8 +719,8 @@ static struct merge_file_info merge_file(struct merge_options *o,
 			free(result_buf.ptr);
 			result.clean = (merge_status == 0);
 		} else if (S_ISGITLINK(a->mode)) {
-			result.clean = 0;
-			hashcpy(result.sha, a->sha1);
+			result.clean = merge_submodule(result.sha, one->path, one->sha1,
+						       a->sha1, b->sha1);
 		} else if (S_ISLNK(a->mode)) {
 			hashcpy(result.sha, a->sha1);
 
diff --git a/refs.c b/refs.c
index f2de9f5..3882131 100644
--- a/refs.c
+++ b/refs.c
@@ -703,6 +703,12 @@ int for_each_ref(each_ref_fn fn, void *cb_data)
 	return do_for_each_ref(NULL, "refs/", fn, 0, 0, cb_data);
 }
 
+int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data)
+{
+	return do_for_each_ref(submodule, "refs/", fn, 0,
+			DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
+}
+
 int for_each_ref_in(const char *prefix, each_ref_fn fn, void *cb_data)
 {
 	return do_for_each_ref(NULL, prefix, fn, strlen(prefix), 0, cb_data);
diff --git a/refs.h b/refs.h
index 384e311..0889d8b 100644
--- a/refs.h
+++ b/refs.h
@@ -19,6 +19,7 @@ struct ref_lock {
  */
 typedef int each_ref_fn(const char *refname, const unsigned char *sha1, int flags, void *cb_data);
 extern int head_ref(each_ref_fn, void *);
+extern int for_each_ref_submodule(const char *submodule, each_ref_fn fn, void *cb_data);
 extern int for_each_ref(each_ref_fn, void *);
 extern int for_each_ref_in(const char *, each_ref_fn, void *);
 extern int for_each_tag_ref(each_ref_fn, void *);
diff --git a/revision.c b/revision.c
index b209d49..8955d7c 100644
--- a/revision.c
+++ b/revision.c
@@ -721,12 +721,15 @@ static void init_all_refs_cb(struct all_refs_cb *cb, struct rev_info *revs,
 	cb->all_flags = flags;
 }
 
-static void handle_refs(struct rev_info *revs, unsigned flags,
+static void handle_refs(const char *submodule, struct rev_info *revs, unsigned flags,
 		int (*for_each)(each_ref_fn, void *))
 {
 	struct all_refs_cb cb;
 	init_all_refs_cb(&cb, revs, flags);
-	for_each(handle_one_ref, &cb);
+	if (!submodule)
+		for_each(handle_one_ref, &cb);
+	else
+		for_each_ref_submodule(submodule, handle_one_ref, &cb);
 }
 
 static void handle_one_reflog_commit(unsigned char *sha1, void *cb_data)
@@ -1357,6 +1360,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 {
 	int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0;
 	const char **prune_data = NULL;
+	const char *submodule = NULL;
+
+	if (opt)
+		submodule = opt->submodule;
 
 	/* First, search for "--" */
 	seen_dashdash = 0;
@@ -1381,26 +1388,30 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
 			int opts;
 
 			if (!strcmp(arg, "--all")) {
-				handle_refs(revs, flags, for_each_ref);
-				handle_refs(revs, flags, head_ref);
+				if (submodule) {
+					handle_refs(submodule, revs, flags, NULL);
+				} else {
+					handle_refs(NULL, revs, flags, for_each_ref);
+					handle_refs(NULL, revs, flags, head_ref);
+				}
 				continue;
 			}
 			if (!strcmp(arg, "--branches")) {
-				handle_refs(revs, flags, for_each_branch_ref);
+				handle_refs(NULL, revs, flags, for_each_branch_ref);
 				continue;
 			}
 			if (!strcmp(arg, "--bisect")) {
-				handle_refs(revs, flags, for_each_bad_bisect_ref);
-				handle_refs(revs, flags ^ UNINTERESTING, for_each_good_bisect_ref);
+				handle_refs(NULL, revs, flags, for_each_bad_bisect_ref);
+				handle_refs(NULL, revs, flags ^ UNINTERESTING, for_each_good_bisect_ref);
 				revs->bisect = 1;
 				continue;
 			}
 			if (!strcmp(arg, "--tags")) {
-				handle_refs(revs, flags, for_each_tag_ref);
+				handle_refs(NULL, revs, flags, for_each_tag_ref);
 				continue;
 			}
 			if (!strcmp(arg, "--remotes")) {
-				handle_refs(revs, flags, for_each_remote_ref);
+				handle_refs(NULL, revs, flags, for_each_remote_ref);
 				continue;
 			}
 			if (!prefixcmp(arg, "--glob=")) {
diff --git a/revision.h b/revision.h
index 568f1c9..0fe4322 100644
--- a/revision.h
+++ b/revision.h
@@ -145,6 +145,7 @@ extern volatile show_early_output_fn_t show_early_output;
 struct setup_revision_opt {
 	const char *def;
 	void (*tweak)(struct rev_info *, struct setup_revision_opt *);
+	const char *submodule;
 };
 
 extern void init_revisions(struct rev_info *revs, const char *prefix);
diff --git a/submodule.c b/submodule.c
index abd5fd5..ac76791 100644
--- a/submodule.c
+++ b/submodule.c
@@ -6,6 +6,7 @@
 #include "revision.h"
 #include "run-command.h"
 #include "diffcore.h"
+#include "refs.h"
 
 static int add_submodule_odb(const char *path)
 {
@@ -242,3 +243,157 @@ int checkout_submodule(const char *path, const unsigned char sha1[20], int force
 
 	return 0;
 }
+
+static int find_first_merges(struct object_array *result, const char *path,
+		struct commit *a, struct commit *b)
+{
+	int i, j;
+	struct object_array merges;
+	struct commit *commit;
+	int contains_another;
+
+	char merged_revision[42];
+	const char *rev_args[] = { "rev-list", "--merges", "--all", merged_revision, NULL };
+	struct rev_info revs;
+	struct setup_revision_opt rev_opts;
+
+	memset(&merges, 0, sizeof(merges));
+	memset(result, 0, sizeof(struct object_array));
+	memset(&rev_opts, 0, sizeof(rev_opts));
+
+	/* get all revisions that merge commit a */
+	snprintf(merged_revision, sizeof(merged_revision), "^%s",
+			find_unique_abbrev(a->object.sha1, 40));
+	init_revisions(&revs, NULL);
+	rev_opts.submodule = path;
+	setup_revisions(sizeof(rev_args)/sizeof(char *)-1, rev_args, &revs, &rev_opts);
+
+	/* save all revisions from the above list that contain b */
+	if (prepare_revision_walk(&revs))
+		die("revision walk setup failed");
+	while ((commit = get_revision(&revs)) != NULL) {
+		struct object *o = &(commit->object);
+		if (in_merge_bases(b, (struct commit **) &o, 1)) {
+			add_object_array(o, NULL, &merges);
+		}
+	}
+
+	/* Now we've got all merges that contain a and b. Prune all
+	 * merges that contain another found merge and save them in
+	 * result. */
+	for (i = 0; i < merges.nr; i++) {
+		struct commit *m1 = (struct commit *) merges.objects[i].item;
+
+		contains_another = 0;
+		for (j = 0; j < merges.nr; j++) {
+			struct commit *m2 = (struct commit *) merges.objects[j].item;
+			if (i != j && in_merge_bases(m2, &m1, 1)) {
+				contains_another = 1;
+				break;
+			}
+		}
+
+		if (!contains_another)
+			add_object_array(merges.objects[i].item,
+					 merges.objects[i].name, result);
+	}
+
+	free(merges.objects);
+	return result->nr;
+}
+
+static void print_commit(struct commit *commit)
+{
+	static const char *format = " %h: %m %s";
+	struct strbuf sb = STRBUF_INIT;
+	struct pretty_print_context ctx = {0};
+	ctx.date_mode = DATE_NORMAL;
+	format_commit_message(commit, format, &sb, &ctx);
+	strbuf_addstr(&sb, "\n");
+	fprintf(stderr, "%s", sb.buf);
+}
+
+int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20],
+		    const unsigned char a[20], const unsigned char b[20])
+{
+	struct commit *commit_base, *commit_a, *commit_b;
+	int parent_count;
+	struct object_array merges;
+
+	int i;
+
+	/* store a in result in case we fail */
+	hashcpy(result, a);
+
+	/* we can not handle deletion conflicts */
+	if (is_null_sha1(base))
+		return 0;
+	if (is_null_sha1(a))
+		return 0;
+	if (is_null_sha1(b))
+		return 0;
+
+	if (add_submodule_odb(path)) {
+		warning("Failed to merge submodule %s (not checked out)", path);
+		return 0;
+	}
+
+	if (!(commit_base = lookup_commit_reference(base)) ||
+	    !(commit_a = lookup_commit_reference(a)) ||
+	    !(commit_b = lookup_commit_reference(b)))
+	{
+		warning("Failed to merge submodule %s (commits not present)", path);
+		return 0;
+	}
+
+	/* 1. case a is contained in b or vice versa */
+	if (in_merge_bases(commit_a, &commit_b, 1)) {
+		hashcpy(result, b);
+		return 1;
+	}
+	if (in_merge_bases(commit_b, &commit_a, 1)) {
+		hashcpy(result, a);
+		return 1;
+	}
+
+	/* 2. case there are one ore more merges that contain a and b the
+	 * submodule. If there is a single one check it out but leave it
+	 * marked unmerged so the user needs to confirm the resolution */
+
+	/* are both changes forward */
+	if (!in_merge_bases(commit_base, &commit_a, 1) ||
+	    !in_merge_bases(commit_base, &commit_b, 1))
+	{
+		warning("Submodule rewound can not merge");
+		return 0;
+	}
+
+	/* find commit which merges them */
+	parent_count = find_first_merges(&merges, path, commit_a, commit_b);
+	if (!parent_count) {
+		warning("Failed to merge submodule %s (merge not found)", path);
+		goto finish;
+	}
+
+	if (parent_count != 1) {
+		warning("Failed to merge submodule %s (multiple merges found):", path);
+		for (i = 0; i < merges.nr; i++) {
+			print_commit((struct commit *) merges.objects[i].item);
+		}
+		goto finish;
+	}
+
+	warning("Failed to merge submodule %s (not fast-forward):\n", path);
+	fprintf(stderr, "Found a possible merge resolution for the submodule:\n");
+	print_commit((struct commit *) merges.objects[0].item);
+	fprintf(stderr, "If this is correct simply add it to the index for example\n"
+			"by using:\n\n"
+			"   git add %s\n\n"
+			"which will accept this suggestion.\n", path);
+
+	checkout_submodule(path, merges.objects[0].item->sha1, 0);
+
+finish:
+	free(merges.objects);
+	return 0;
+}
diff --git a/submodule.h b/submodule.h
index fc6909e..12d6d73 100644
--- a/submodule.h
+++ b/submodule.h
@@ -7,5 +7,7 @@ void show_submodule_summary(FILE *f, const char *path,
 		const char *del, const char *add, const char *reset);
 unsigned is_submodule_modified(const char *path, int ignore_untracked);
 int checkout_submodule(const char *path, const unsigned char sha1[20], int force);
+int merge_submodule(unsigned char result[20], const char *path, const unsigned char base[20],
+		    const unsigned char a[20], const unsigned char b[20]);
 
 #endif
diff --git a/t/t7405-submodule-merge.sh b/t/t7405-submodule-merge.sh
index 4a7b893..04dc371 100755
--- a/t/t7405-submodule-merge.sh
+++ b/t/t7405-submodule-merge.sh
@@ -54,13 +54,116 @@ test_expect_success setup '
 	git merge -s ours a
 '
 
-test_expect_success 'merging with modify/modify conflict' '
+# History setup
+#
+#      b
+#    /   \
+#   a     d
+#    \   /
+#      c
+#
+# a in the main repository records to sub-a in the submodule and
+# analogous b and c. d should be automatically found by merging c into
+# b in the main repository.
+test_expect_success 'setup for merge search' '
+	mkdir merge-search &&
+	cd merge-search &&
+	git init &&
+	mkdir sub &&
+	(cd sub &&
+	 git init &&
+	 echo "file-a" > file-a &&
+	 git add file-a &&
+	 git commit -m "sub-a" &&
+	 git checkout -b sub-a) &&
+	git add sub &&
+	git commit -m "a" &&
+	git checkout -b a &&
+
+	git checkout -b b &&
+	(cd sub &&
+	 git checkout -b sub-b &&
+	 echo "file-b" > file-b &&
+	 git add file-b &&
+	 git commit -m "sub-b") &&
+	git commit -a -m "b" &&
+
+	git checkout -b c a &&
+	(cd sub &&
+	 git checkout -b sub-c sub-a &&
+	 echo "file-c" > file-c &&
+	 git add file-c &&
+	 git commit -m "sub-c") &&
+	git commit -a -m "c" &&
+
+	(cd sub &&
+	 git checkout -b sub-d sub-b &&
+	 git merge sub-c &&
+	 git checkout sub-b) &&
+	git checkout -b test b &&
+	cd ..
+'
+
+test_expect_success 'merging with common parent search' '
+	cd merge-search &&
+	git checkout -b test-parent b &&
+	git merge c &&
+	git ls-tree test-parent | grep sub | cut -f1 | cut -f3 -d" " > actual &&
+	(cd sub &&
+	 git rev-parse sub-d > ../expect) &&
+	test_cmp actual expect &&
+	cd ..
+'
+
+test_expect_success 'merging should fail for ambigous common parent' '
+	cd merge-search &&
+	git checkout -b test-ambigous b &&
+	(cd sub &&
+	 git checkout -b ambigous sub-d &&
+	 echo "ambigous-file" > ambigous-file &&
+	 git add ambigous-file &&
+	 git commit -m "ambigous") &&
+	test_must_fail git merge c &&
+	git reset --hard &&
+	cd ..
+'
+
+# in a situation like this
+#
+# submodule tree:
+#
+#    sub-a --- sub-b --- sub-d
+#
+# main tree:
+#
+#    e (sub-a)
+#   /
+#  d (sub-b)
+#   \
+#    f (sub-d)
+#
+# A merge should fail because one change points backwards.
+
+test_expect_success 'merging should fail for changes that are backwards' '
+	cd merge-search &&
+	git checkout -b d a &&
+	(cd sub &&
+	 git checkout sub-b) &&
+	git commit -a -m "d" &&
+
+	git checkout -b e d &&
+	(cd sub &&
+	 git checkout sub-a) &&
+	git commit -a -m "e" &&
+
+	git checkout -b f d &&
+	(cd sub &&
+	 git checkout sub-d) &&
+	git commit -a -m "f" &&
 
-	git checkout -b test1 a &&
-	test_must_fail git merge b &&
-	test -f .git/MERGE_MSG &&
-	git diff &&
-	test -n "$(git ls-files -u)"
+	git checkout -b test-backward e &&
+	test_must_fail git merge f &&
+	cd ..
 '
 
 test_expect_success 'merging with a modify/modify conflict between merge bases' '
-- 
1.7.1.465.g3692.dirty

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-18  9:40                         ` Johan Herland
  2010-06-18 13:55                           ` Jens Lehmann
  2010-06-19 10:17                           ` Heiko Voigt
@ 2010-06-20 18:04                           ` Junio C Hamano
  2010-06-20 23:06                             ` Johan Herland
  2 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2010-06-20 18:04 UTC (permalink / raw)
  To: Johan Herland; +Cc: Jens Lehmann, git, Heiko Voigt

Johan Herland <johan@herland.net> writes:

> On Thursday 17 June 2010, Jens Lehmann wrote:
> ...
>> And no 'special' branch is used here.
>
> Well, you need to traverse _some_ submodule ref(s) in order to find 'E' at 
> all. My argument is that there may also be _other_ submodule refs that 
> contain merges of 'B' and 'F' as well, and they should _also_ be considered 
> as valid candidates for the resolution in '5'. I would in fact argue that 
> you should traverse _all_ submodule refs (maybe even including remote-
> tracking refs) to look for merges of 'B' and 'F' [1], and present them all 
> as equal alternatives.
>
> Consider for example this submodule scenario:
>
>         -----------G      [maint]
>        /          /
>    ---B--------  /        [feature_a]
>   /    \       \/
>  A--C---D---E  /\         [master]
>      \     /  /  \
>       ----F---    \       [feature_b]
>               \    \
>                --H--I--J  [next]
>
> If there exist multiple merges that resolve 'B' and 'F' (in this case: 'G', 
> 'E' and 'I'), then all of those should be presented as equal alternatives to 
> the user.

You lost me completely here.

I thought you were going to argue that it would be an utterly wrong thing
to suggest E or I as a probably resolution if the superproject merge that
needs to merge superproject commits that binds B and F as its submodules
is being done in the context of advance 'maint' track of the superproject.

Think of 'D' as a commit that corresponds to a major version bump point of
the superproject; i.e. it introduces a major change to the submodule.  In
the 'maintenance track' of the superproject for maintaining the previous
version, you don't want to have any commit that has 'D' as an ancestor.

For an "automated" heuristics based on "find common descendants" to make
sense, the branches you are merging have to share the common purpose, and
you need to limit the common descendants you find to the ones that are
compatible with the shared purpose.  The purpose of 'maintenance track'
may be to maintain the previous version without dragging newer and more
exciting things that happened in the later development.  In the above
picture, G (that has nothing but B and F) is the only commit that can be
safely assumed that two commits in the superproject space that bind B and
F respectively can use as the submodule as their merge result.  E and I
are contaminated with D and H whose purpose in the superproject space is
unknown without further hint.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-20 18:04                           ` [WIP PATCH 0/3] implement merge strategy for submodule links Junio C Hamano
@ 2010-06-20 23:06                             ` Johan Herland
  2010-06-21  0:03                               ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-20 23:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann, Heiko Voigt

On Sunday 20 June 2010, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> > On Thursday 17 June 2010, Jens Lehmann wrote:
> > ...
> > 
> >> And no 'special' branch is used here.
> > 
> > Well, you need to traverse _some_ submodule ref(s) in order to find 'E'
> > at all. My argument is that there may also be _other_ submodule refs
> > that contain merges of 'B' and 'F' as well, and they should _also_ be
> > considered as valid candidates for the resolution in '5'. I would in
> > fact argue that you should traverse _all_ submodule refs (maybe even
> > including remote- tracking refs) to look for merges of 'B' and 'F'
> > [1], and present them all as equal alternatives.
> > 
> > Consider for example this submodule scenario:
> >         -----------G      [maint]
> >        /          /
> >    ---B--------  /        [feature_a]
> >   /    \       \/
> >  A--C---D---E  /\         [master]
> >      \     /  /  \
> >       ----F---    \       [feature_b]
> >               \    \
> >                --H--I--J  [next]
> > 
> > If there exist multiple merges that resolve 'B' and 'F' (in this case:
> > 'G', 'E' and 'I'), then all of those should be presented as equal
> > alternatives to the user.
> 
> You lost me completely here.
> 
> I thought you were going to argue that it would be an utterly wrong thing
> to suggest E or I as a probably resolution if the superproject merge that
> needs to merge superproject commits that binds B and F as its submodules
> is being done in the context of advance 'maint' track of the
> superproject.
> 
> Think of 'D' as a commit that corresponds to a major version bump point
> of the superproject; i.e. it introduces a major change to the submodule.
>  In the 'maintenance track' of the superproject for maintaining the
> previous version, you don't want to have any commit that has 'D' as an
> ancestor.
> 
> For an "automated" heuristics based on "find common descendants" to make
> sense, the branches you are merging have to share the common purpose, and
> you need to limit the common descendants you find to the ones that are
> compatible with the shared purpose.  The purpose of 'maintenance track'
> may be to maintain the previous version without dragging newer and more
> exciting things that happened in the later development.  In the above
> picture, G (that has nothing but B and F) is the only commit that can be
> safely assumed that two commits in the superproject space that bind B and
> F respectively can use as the submodule as their merge result.  E and I
> are contaminated with D and H whose purpose in the superproject space is
> unknown without further hint.

Yes, from a 'maint'-perspective, using G in the superproject probably makes 
more sense than using E or I. From a different superproject perspective, 
though, using E or I might make more sense. If, say, the superproject 
customarily follows the commits on the 'master' branch in the submodule, but 
the superproject has not yet gotten around to updating from A to C, D or E, 
then, by the time we do the superproject merge of Alice and Bob's branches, 
I would still say that using E is better than using G.

My argument is that without knowing the purpose of the superproject merge 
(which Git by itself _cannot_ know), Git should not prefer _any_ of these 
merges over the other, but must present them all as equal alternatives to 
the user.

Of course, the user has other alternatives as well, like creating a whole 
new merge in the submodule, or doing something completely different. But if 
existing submodule merges are to be considered valid alternatives, Git 
cannot pretend to know which of those merges are more suitable. It can only 
present them to the user, and then the user (after having examined the 
merges and their history relative to B and F) may choose the merge that 
matches the purpose of the superproject merge.


...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-20 23:06                             ` Johan Herland
@ 2010-06-21  0:03                               ` Junio C Hamano
  2010-06-21 10:19                                 ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2010-06-21  0:03 UTC (permalink / raw)
  To: Johan Herland; +Cc: git, Jens Lehmann, Heiko Voigt

Johan Herland <johan@herland.net> writes:

>> For an "automated" heuristics based on "find common descendants" to make
>> sense, the branches you are merging have to share the common purpose, and
>> you need to limit the common descendants you find to the ones that are
>> compatible with the shared purpose.  The purpose of 'maintenance track'
>> may be to maintain the previous version without dragging newer and more
>> exciting things that happened in the later development.  In the above
>> picture, G (that has nothing but B and F) is the only commit that can be
>> safely assumed that two commits in the superproject space that bind B and
>> F respectively can use as the submodule as their merge result.  E and I
>> are contaminated with D and H whose purpose in the superproject space is
>> unknown without further hint.
>
> Yes, from a 'maint'-perspective, using G in the superproject probably makes 
> more sense than using E or I. From a different superproject perspective, 
> though, using E or I might make more sense.

Actually, what I was alluding to was that 'G' would be the _only_ commit
that may make sense (note that G may not necessarily make sense, but the
point is that we can say that others do _not_ make sense as alternatives)
if we know that the context of making the superproject merge is that it is
doing the 'maintenance track' merge.  Similarly, if we know that the merge
being done in the superproject is in the 'master' context, 'E' would be
the _only_ plausible candidate, similarly for 'I' in 'next' context.

It is further plausible to imagine that the .gitmodules file tracked in
the superproject's 'maint' branch can be used to express that 'maint'
branch of the submodule should be used.

If we revisit the Alice and Bob example with such an arrangement, if they
were working on their branches so that their results would be included in
the 'maint' track of the superproject, there won't be a merge conflict in
the .gitmodules file at the superproject level when their branch tips are
merged; we will know that the merged .gitmodules file will tell us that we
would want to follow 'maint' branch of the submodule.

Similarly if Alice were fixing a bug in 'maint' but Bob were advancing
features in 'master', then merging .gitmodules at the superproject level
will fast-forward at the path level (i.e. Alice didn't touch, but Bob
changed, so we take Bob's change), instructing us to follow 'master'
branch from the submodule automatically.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-21  0:03                               ` Junio C Hamano
@ 2010-06-21 10:19                                 ` Johan Herland
  2010-06-21 15:22                                   ` Junio C Hamano
  2010-06-23  7:38                                   ` Finn Arne Gangstad
  0 siblings, 2 replies; 32+ messages in thread
From: Johan Herland @ 2010-06-21 10:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann, Heiko Voigt

On Monday 21 June 2010, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> >> For an "automated" heuristics based on "find common descendants" to
> >> make sense, the branches you are merging have to share the common
> >> purpose, and you need to limit the common descendants you find to the
> >> ones that are compatible with the shared purpose.  The purpose of
> >> 'maintenance track' may be to maintain the previous version without
> >> dragging newer and more exciting things that happened in the later
> >> development.  In the above picture, G (that has nothing but B and F)
> >> is the only commit that can be safely assumed that two commits in the
> >> superproject space that bind B and F respectively can use as the
> >> submodule as their merge result.  E and I are contaminated with D and
> >> H whose purpose in the superproject space is unknown without further
> >> hint.
> > 
> > Yes, from a 'maint'-perspective, using G in the superproject probably
> > makes more sense than using E or I. From a different superproject
> > perspective, though, using E or I might make more sense.
> 
> Actually, what I was alluding to was that 'G' would be the _only_ commit
> that may make sense (note that G may not necessarily make sense, but the
> point is that we can say that others do _not_ make sense as alternatives)
> if we know that the context of making the superproject merge is that it
> is doing the 'maintenance track' merge.  Similarly, if we know that the
> merge being done in the superproject is in the 'master' context, 'E'
> would be the _only_ plausible candidate, similarly for 'I' in 'next'
> context.

Ah, so Git should automatically _eliminate_ other alternatives based on the 
fact that they are not compatible with the purpose of the superproject 
merge. Still, that requires Git to know the purpose of the superproject 
merge. Which it doesn't, AFAICS.

> It is further plausible to imagine that the .gitmodules file tracked in
> the superproject's 'maint' branch can be used to express that 'maint'
> branch of the submodule should be used.

Ok, so you want to create some kind of relationship between the 
superproject's 'maint' branch and the submodule's 'maint' branch. At this 
point we're almost back to the (magic IMHO) "stable" branch setting that 
Heiko alluded to in his initial patch series. Except, possibly, that instead 
of using the tip of that branch you'd use the first merge of B and F on that 
branch.

I still don't like this, as IMHO it's too subtle, and possibly conflicts 
with explicitly tracking submodule branches (which, to me, is a more 
important feature).

Or are you talking about outright tracking submodule branches (as proposed 
by Ævar in a different thread)? In that case, the issue changes completely:

If you're explicitly tracking the 'maint' branch in the submodule, then IMHO 
Git should always propose the tip of the submodule's 'maint' branch as the 
merge resolution in the superproject (possibly with a warning printed if 
that tip does not descend from both B and F).

> If we revisit the Alice and Bob example with such an arrangement, if they
> were working on their branches so that their results would be included in
> the 'maint' track of the superproject, there won't be a merge conflict in
> the .gitmodules file at the superproject level when their branch tips are
> merged; we will know that the merged .gitmodules file will tell us that
> we would want to follow 'maint' branch of the submodule.
>
> Similarly if Alice were fixing a bug in 'maint' but Bob were advancing
> features in 'master', then merging .gitmodules at the superproject level
> will fast-forward at the path level (i.e. Alice didn't touch, but Bob
> changed, so we take Bob's change), instructing us to follow 'master'
> branch from the submodule automatically.

Ok, so these are similar to Ævar's proposal (and its subsequent discussion) 
for explicitly tracking submodule branches. Still not sure if we're actually 
talking about the same thing, though.

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-21 10:19                                 ` Johan Herland
@ 2010-06-21 15:22                                   ` Junio C Hamano
  2010-06-21 22:35                                     ` Johan Herland
  2010-06-23  7:38                                   ` Finn Arne Gangstad
  1 sibling, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2010-06-21 15:22 UTC (permalink / raw)
  To: Johan Herland; +Cc: git, Jens Lehmann, Heiko Voigt

Johan Herland <johan@herland.net> writes:

> I still don't like this, as IMHO it's too subtle, and possibly conflicts 
> with explicitly tracking submodule branches (which, to me, is a more 
> important feature).

If you mean, by "explicitly tracking", to say "I don't care which commit
from the submodule appears at this path, as long as it is at the tip of
this branch", I still don't think it makes much sense, but what I outlined
is not _incompatible_ with such a scheme.  In fact I think it would rather
fit naturally as a sanity/safety measure.

I presume that in your "explicitly tracked" world, if the user tries to
commit at the superproject level with a submodule commit that is
inconsistent with that "explicitly tracked" branch (e.g. the commit is not
reachable from the tip of that branch), you would issue a warning of some
sort, using that knowledge.  What I outlined uses the exact same knowledge
of which branch in the submodule the superproject branch is tied to to
reject irrelevant existing merges as resolution candidates.

Of course, this ".gitmodule in superproject can tell you which branch of
submodule it follows" is optional; the user needs to take responsibility
of picking the right one among I, E and G, of course, if the information
does not exist or is not available.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-21 15:22                                   ` Junio C Hamano
@ 2010-06-21 22:35                                     ` Johan Herland
  2010-06-22  4:04                                       ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Johan Herland @ 2010-06-21 22:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann, Heiko Voigt

On Monday 21 June 2010, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> > I still don't like this, as IMHO it's too subtle, and possibly
> > conflicts with explicitly tracking submodule branches (which, to me,
> > is a more important feature).
> 
> If you mean, by "explicitly tracking", to say "I don't care which commit
> from the submodule appears at this path, as long as it is at the tip of
> this branch", I still don't think it makes much sense, but what I
> outlined is not _incompatible_ with such a scheme.  In fact I think it
> would rather fit naturally as a sanity/safety measure.

I'll first try to explain where I'm coming from, to hopefully eliminate any 
confusion about my position:

IMHO, there should be 2 primary modes for submodules in Git:

A. Explicitly tracking submodule commits. This is the existing submodule 
behaviour. The superproject refers directly (in its tree) to a submodule 
commit. The .gitmodules file contains associated information 
(submodule.<name>.path, .url and .update).

B. Explicitly tracking submodule branches. An extra setting 
(submodule.<name>.branch) is added to the .gitmodules file, to determine 
which submodule branch to checkout. This setting overrides whatever 
submodule commit, if any, is stored in the superproject tree. There are two 
sub-modes for this mode:

B.1. There is no submodule entry at all in the superproject tree. This 
indicates that you are not at all interested in tracking the history of the 
submodule relative to the superproject. You are always interested in 
checking out the tip of submodule.<name>.branch in the submodule, even when 
digging into the superproject's history.

B.2. There is a submodule entry in the superproject tree. This "hybrid" 
approach indicates that although you primarily want to track a branch in the 
submodule (i.e. you mostly want the latest version of the submodule's 
branch), you still want a record of where your submodule has been pointing, 
in your superproject's history. Exactly when (or how often) the 
superproject's submodule entry should be updated is yet TBD. So is when to 
check out according to .branch, and when to use the recorded submodule 
commit. In the end, it'll probably be a policy decision for the projects 
that choose this approach.

Ok, that hopefully explains the basic idea of tracking submodule commits (A) 
vs. tracking submodule branches (B). Now, how would this apply to merging 
submodules?

In case of B, I'd argue that the submodule merging should _only_ look at the 
value of submodule.<name>.branch from the superproject's .gitmodules. If 
this setting is ambiguous (because of merge conflicts in .gitmodules), Git 
should not touch the submodule at all (until .gitmodules is resolved). 
Otherwise, Git's only task is to checkout whatever branch is specified by 
.gitmodules. If the commit that is checked out does not descend from all of 
the merge alternatives, a warning should be printed.

With that in mind, I enter this discussion because it might provide insight 
on how to solve the problem of merging submodules in scenario A.

In mode A there is no submodule.<name>.branch setting, and I would not like 
to add an additional setting (let's call it submodule.<name>.merge_branch 
for now) that is "weaker" than submodule.<name>.branch (meaning that it does 
not trigger the transition from mode A to mode B). There are two major 
reasons for this:

1. submodule.<name>.merge_branch would add semantics to the case of merging 
submodules that would be similar in spirit to what submodule.<name>.branch 
does (the "spirit" here is the special relationship to a submodule branch 
that we're establishing), but still the .merge_branch setting would be 
different in practice, by (a) only applying to the case of merging 
submodules (while .branch changes the semantics of almost all submodule 
operations), and (b) not even in the case of merging submodules would the 
options do the same thing. I fear the semantics of the .merge_branch option 
would be too complicated for an average user, and that its similarity to 
.branch would cause confusion.

2. What would happen if you enabled _both_ .merge_branch and .branch? In the 
case of merging submodules which setting will "win"? Even worse, if you set 
.merge_branch to "foo", and .branch to "bar", what will then happen?

Ok, so if I oppose adding .merge_branch, what do I propose instead?

Currently, not much, I'm afraid. But I have a gut feeling that the use case 
presented by Heiko and Jens is best solved EITHER by having no special 
branch relationships between the superproject and submodule (which AFAICS is 
what we currently agree on in this thread, and Heiko has already submitted a 
patch to this effect), OR by employing a conservative version of mode B.2 in 
which we use .branch to track a submodule branch, but still keep a close 
look at the recorded submodule commit, and, if necessary, maybe introduce 
some other options to tell Git when to use the recorded commit, and when to 
use the branch tip.

In other words, I think we should explore the .branch direction before we 
add complexity and potential confusion by prematurely adding another option 
that it somewhat similar to .branch in some contexts.

> I presume that in your "explicitly tracked" world, if the user tries to
> commit at the superproject level with a submodule commit that is
> inconsistent with that "explicitly tracked" branch (e.g. the commit is
> not reachable from the tip of that branch), you would issue a warning of
> some sort, using that knowledge.

Yes.

> What I outlined uses the exact same
> knowledge of which branch in the submodule the superproject branch is
> tied to to reject irrelevant existing merges as resolution candidates.

True, but as I've argued above, I'm not sure that adding another setting 
(aka. .merge_branch) for this special/limited kind of branch tracking is 
worth it.

> Of course, this ".gitmodule in superproject can tell you which branch of
> submodule it follows" is optional; the user needs to take responsibility
> of picking the right one among I, E and G, of course, if the information
> does not exist or is not available.

Yes, of course. And this corresponds to what I've proposed for scenario A, 
when there is no branch-related setting specified for the submodule.

Hope this helps,

...Johan

-- 
Johan Herland, <johan@herland.net>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-21 22:35                                     ` Johan Herland
@ 2010-06-22  4:04                                       ` Junio C Hamano
  2010-06-22 10:48                                         ` Johan Herland
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2010-06-22  4:04 UTC (permalink / raw)
  To: Johan Herland; +Cc: git, Jens Lehmann, Heiko Voigt

Johan Herland <johan@herland.net> writes:

> True, but as I've argued above, I'm not sure that adding another setting 
> (aka. .merge_branch) for this special/limited kind of branch tracking is 
> worth it.

I don't think .merge_branch is necessary nor even desired.  In fact, I
think your use of .branch, especially in the variant that does not have
any submodule entry in the superproject tree, of your version (B) does not
have conceptual advantage.  You checkout the superproject first (which
would be the natural thing to do, as you may get update to its .gitmodules
there), and checkout the then-tip of the named branch of the submodule,
you would immediately get a stale checkout when you then go fetch the
updates to the submodule.

And the worst part is that you wouldn't even _notice_ that your checkout
is stale, as there is no record in the superproject which commit you were
supposed to be using to be consistent with the version the committer of
the superproject commit used to record it.

I on the other hand think what you called "hybrid" makes sense (and I
don't even think it is hybrid but rather is a natural way to do this).
With the submodule.*.branch entry, you can:

 - make sure that your checkout is consistent; if your submodule checks
   out a different commit or branch from what the superproject records in
   its tree or in its .gitmodules (e.g. you forgot to update the submodule
   when you switched superproject branch), git can notice the situation
   and can help you implement policy decisions;

 - record a commit that is different from the tip of the submodule branch
   when making a superproject commit; git can notice the situation and can
   help you implement policy decisions (e.g. you could choose to reject
   and tell the user to advance the submodule branch first before making
   the commit in the superproject);

 - use it as an advisory "existing merge commit selector", as discussed in
   this thread.

Thinking about what would happen in your (B) that doesn't record the exact
commit, I think that it doesn't have any advantage over the "hybrid" one.
The "hybrid" one can help you to make sure that what you commit in the
superproject's .gitmodules and submodule's branch tip are kept consistent.
When they are kept consistent, then switching branches in the superproject
should always flip between the tips of branches, no?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-22  4:04                                       ` Junio C Hamano
@ 2010-06-22 10:48                                         ` Johan Herland
  0 siblings, 0 replies; 32+ messages in thread
From: Johan Herland @ 2010-06-22 10:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Jens Lehmann, Heiko Voigt

On Tuesday 22 June 2010, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> > True, but as I've argued above, I'm not sure that adding another
> > setting (aka. .merge_branch) for this special/limited kind of
> > branch tracking is worth it.
>
> I don't think .merge_branch is necessary nor even desired.

Good.

> In fact, 
> I think your use of .branch, especially in the variant that does not
> have any submodule entry in the superproject tree, of your version
> (B) does not have conceptual advantage.  You checkout the
> superproject first (which would be the natural thing to do, as you
> may get update to its .gitmodules there), and checkout the then-tip
> of the named branch of the submodule, you would immediately get a
> stale checkout when you then go fetch the updates to the submodule.

Not if the initial checkout of the submodule included an implicit 
fetch/pull within the submodule (exact behaviour probably to be 
controlled with some config).

> And the worst part is that you wouldn't even _notice_ that your
> checkout is stale, as there is no record in the superproject which
> commit you were supposed to be using to be consistent with the
> version the committer of the superproject commit used to record it.

The idea (in B.1.) is that if you _truly_ don't care which version of 
the submodule you're checking out, then there should be no submodule 
entry in the superproject that would "pollute" your status/diffs.

Note that I'm not in this camp myself, as I would very much like to keep 
track of where my submodule is (and has been) checked out. So I'll stop 
trying to argue for a use case that I'm not going to use. At the time, 
it seemed like a logical conclusion of the tracking-submodule-branches 
debate, but if it's not going to be used in practice, there's no point 
in keeping it alive.

> I on the other hand think what you called "hybrid" makes sense (and I
> don't even think it is hybrid but rather is a natural way to do
> this).

Agreed. I too believe that the "hybrid" is much more useful in practice 
than the extreme version (without a gitlink entry in the superproject).

But I also believe that Git shouldn't enforce specific workflows, so 
that if people actually want to track submodule branches in the 
non-"hybrid" (haphazard) manner, then Git should not stand in their 
way.

On the other hand, if nobody's gonna do this, there's no point in 
implementing support for it.

> With the submodule.*.branch entry, you can: 
>
>  - make sure that your checkout is consistent; if your submodule
> checks out a different commit or branch from what the superproject
> records in its tree or in its .gitmodules (e.g. you forgot to update
> the submodule when you switched superproject branch), git can notice
> the situation and can help you implement policy decisions;
>
>  - record a commit that is different from the tip of the submodule
> branch when making a superproject commit; git can notice the
> situation and can help you implement policy decisions (e.g. you could
> choose to reject and tell the user to advance the submodule branch
> first before making the commit in the superproject);
>
>  - use it as an advisory "existing merge commit selector", as
> discussed in this thread.

Ah, I see. IINM you indeed prefer to use the same setting (aka. 
submodule.<name>.branch) for controlling both the merging of submodules 
(as discussed in this thread), and the other aspects of the submodule 
branch tracking feature. We agree here.

However, it seems you would like the .branch setting to be more advisory 
in nature: Instead of blindly checking out whatever .branch specifies, 
you'd rather have a more careful interplay where the .branch option 
should only check whether it is consistent with what happens to be 
checked out in the submodule, and warn when this is not the case.

This may be better than what I suggested, but it's hard to say anything 
for sure until the various alternatives are tested in practice.

> Thinking about what would happen in your (B) that doesn't record the
> exact commit, I think that it doesn't have any advantage over the
> "hybrid" one. The "hybrid" one can help you to make sure that what
> you commit in the superproject's .gitmodules and submodule's branch
> tip are kept consistent. When they are kept consistent, then
> switching branches in the superproject should always flip between the
> tips of branches, no?

Yes, if branch "foo" in the superproject records "branch = subfoo" 
in .gitmodules, and the current tip of "subfoo" as a gitlink, and 
branch "bar" in the superproject records "branch = subbar" 
in .gitmodules and the current tip of "subbar" as a gitlink, then, 
indeed, switching between these two branches should auto-flip between 
the branch tips.

One of the remaining questions is what happens when the superproject 
does not change, but commits are added to the submodule branches. Now, 
when switching between branches in the superproject, should Git:

1. Do nothing (this is the current behaviour, the user is forced to 'git 
submodule update' after 'git checkout' in the superproject)

2. Only checkout the recorded gitlink (this defeats the purpose of 
branch-tracking, IMO)

3. Checkout the recorded gitlink, but warn about the updated branch tip 
(a valid behaviour IMO)

4. Checkout the updated branch tip of the submodule, and warn about the 
out-of-date gitlink entry (also a valid behaviour IMO)

5. Checkout the updated branch tip AND stage a new gitlink in the 
superproject (this is a bit to "magic", IMO)

...Johan

-- 
Johan Herland, <jherland@gmail.com>
www.herland.net

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [WIP PATCH 0/3] implement merge strategy for submodule links
  2010-06-21 10:19                                 ` Johan Herland
  2010-06-21 15:22                                   ` Junio C Hamano
@ 2010-06-23  7:38                                   ` Finn Arne Gangstad
  1 sibling, 0 replies; 32+ messages in thread
From: Finn Arne Gangstad @ 2010-06-23  7:38 UTC (permalink / raw)
  To: Johan Herland; +Cc: Junio C Hamano, git, Jens Lehmann, Heiko Voigt

On Mon, Jun 21, 2010 at 12:19:02PM +0200, Johan Herland wrote:
> 
> Ah, so Git should automatically _eliminate_ other alternatives based on the 
> fact that they are not compatible with the purpose of the superproject 
> merge. Still, that requires Git to know the purpose of the superproject 
> merge. Which it doesn't, AFAICS.

This appears to be a good time to resuscitate an idea we had a while ago:

Why not make merging recursive wrt submodules?

I have been trying for some time to figure out how to use git for some
huge projects that consist of hundreds of more or less tightly coupled
modules. Some of these modules are used for multiple projects, and
taken together they are too large to fit well into a single repository.

I wouuld like to be able to use submodules to achieve several things:

1. Partial clones/checkouts - clone only the modules you need
2. Make git behave for projects that are too large for a single repo
3. Bonus: share some modules between muptiple projects

Now - for each superproject I would like this to behave as closely as
possible to working with a single repo, so for merging this would mean
that any merge from toplevel would automatically do the merge in all
affected submodules as well. Just merge the sha1's directly, don't try
to be clever - merge as if it was a subdirectory.

- Finn Arne

^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2010-06-23  7:38 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-11 12:23 [WIP PATCH 0/3] implement merge strategy for submodule links Heiko Voigt
2010-06-11 12:23 ` [WIP PATCH 1/3] extend ref iteration for submodules Heiko Voigt
2010-06-11 12:23 ` [WIP PATCH 2/3] add missing && to submodule-merge testcase Heiko Voigt
2010-06-11 12:23 ` [WIP PATCH 3/3] implement automatic fast forward merge for submodules Heiko Voigt
2010-06-12 10:12 ` [WIP PATCH 0/3] implement merge strategy for submodule links Johan Herland
2010-06-12 12:06   ` Heiko Voigt
2010-06-13 17:59     ` Johan Herland
2010-06-14 17:02       ` Heiko Voigt
2010-06-14 23:59         ` Johan Herland
2010-06-15 17:37           ` Jens Lehmann
2010-06-16  0:05             ` Johan Herland
2010-06-16 17:16               ` Jens Lehmann
2010-06-16 21:32                 ` Johan Herland
2010-06-16 22:11                   ` Junio C Hamano
2010-06-17  0:39                     ` Johan Herland
2010-06-17 21:13                       ` Jens Lehmann
2010-06-18  9:40                         ` Johan Herland
2010-06-18 13:55                           ` Jens Lehmann
2010-06-19  9:43                             ` Heiko Voigt
2010-06-19 15:54                               ` Jens Lehmann
2010-06-19 10:17                           ` Heiko Voigt
2010-06-19 13:15                             ` Johan Herland
2010-06-19 15:52                               ` [WIP PATCH 3/3] implement automatic fast forward merge for submodules Heiko Voigt
2010-06-20 18:04                           ` [WIP PATCH 0/3] implement merge strategy for submodule links Junio C Hamano
2010-06-20 23:06                             ` Johan Herland
2010-06-21  0:03                               ` Junio C Hamano
2010-06-21 10:19                                 ` Johan Herland
2010-06-21 15:22                                   ` Junio C Hamano
2010-06-21 22:35                                     ` Johan Herland
2010-06-22  4:04                                       ` Junio C Hamano
2010-06-22 10:48                                         ` Johan Herland
2010-06-23  7:38                                   ` Finn Arne Gangstad

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).