git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] commit-reach: avoid commit_list_insert_by_date()
@ 2025-10-24 16:47 René Scharfe
  0 siblings, 0 replies; only message in thread
From: René Scharfe @ 2025-10-24 16:47 UTC (permalink / raw)
  To: Git List

Building a list using commit_list_insert_by_date() has quadratic worst
case complexity.  Avoid it by just appending in the loop and sorting at
the end.

The number of merge bases is usually small, so don't expect speedups in
normal repositories.  It has no limit, though.  The added perf test
shows a nice improvement when dealing with 16384 merge bases:

Test                     v2.51.1           HEAD
-----------------------------------------------------------------
6010.2: git merge-base   0.55(0.54+0.00)   0.03(0.02+0.00) -94.5%

Signed-off-by: René Scharfe <l.s.r@web.de>
---
Patch formatted with --function-context for easier review, particularly
of paint_down_to_common().

 commit-reach.c             |  14 +++--
 t/perf/p6010-merge-base.sh | 101 +++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 5 deletions(-)
 create mode 100755 t/perf/p6010-merge-base.sh

diff --git a/commit-reach.c b/commit-reach.c
index a339e41aa4..cc18c86d3b 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -52,127 +52,130 @@ static int queue_has_nonstale(struct prio_queue *queue)
 /* all input commits in one and twos[] must have been parsed! */
 static int paint_down_to_common(struct repository *r,
 				struct commit *one, int n,
 				struct commit **twos,
 				timestamp_t min_generation,
 				int ignore_missing_commits,
 				struct commit_list **result)
 {
 	struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
 	int i;
 	timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
+	struct commit_list **tail = result;
 
 	if (!min_generation && !corrected_commit_dates_enabled(r))
 		queue.compare = compare_commits_by_commit_date;
 
 	one->object.flags |= PARENT1;
 	if (!n) {
 		commit_list_append(one, result);
 		return 0;
 	}
 	prio_queue_put(&queue, one);
 
 	for (i = 0; i < n; i++) {
 		twos[i]->object.flags |= PARENT2;
 		prio_queue_put(&queue, twos[i]);
 	}
 
 	while (queue_has_nonstale(&queue)) {
 		struct commit *commit = prio_queue_get(&queue);
 		struct commit_list *parents;
 		int flags;
 		timestamp_t generation = commit_graph_generation(commit);
 
 		if (min_generation && generation > last_gen)
 			BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
 			    generation, last_gen,
 			    oid_to_hex(&commit->object.oid));
 		last_gen = generation;
 
 		if (generation < min_generation)
 			break;
 
 		flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
 		if (flags == (PARENT1 | PARENT2)) {
 			if (!(commit->object.flags & RESULT)) {
 				commit->object.flags |= RESULT;
-				commit_list_insert_by_date(commit, result);
+				tail = commit_list_append(commit, tail);
 			}
 			/* Mark parents of a found merge stale */
 			flags |= STALE;
 		}
 		parents = commit->parents;
 		while (parents) {
 			struct commit *p = parents->item;
 			parents = parents->next;
 			if ((p->object.flags & flags) == flags)
 				continue;
 			if (repo_parse_commit(r, p)) {
 				clear_prio_queue(&queue);
 				free_commit_list(*result);
 				*result = NULL;
 				/*
 				 * At this stage, we know that the commit is
 				 * missing: `repo_parse_commit()` uses
 				 * `OBJECT_INFO_DIE_IF_CORRUPT` and therefore
 				 * corrupt commits would already have been
 				 * dispatched with a `die()`.
 				 */
 				if (ignore_missing_commits)
 					return 0;
 				return error(_("could not parse commit %s"),
 					     oid_to_hex(&p->object.oid));
 			}
 			p->object.flags |= flags;
 			prio_queue_put(&queue, p);
 		}
 	}
 
 	clear_prio_queue(&queue);
+	commit_list_sort_by_date(result);
 	return 0;
 }
 
 static int merge_bases_many(struct repository *r,
 			    struct commit *one, int n,
 			    struct commit **twos,
 			    struct commit_list **result)
 {
-	struct commit_list *list = NULL;
+	struct commit_list *list = NULL, **tail = result;
 	int i;
 
 	for (i = 0; i < n; i++) {
 		if (one == twos[i]) {
 			/*
 			 * We do not mark this even with RESULT so we do not
 			 * have to clean it up.
 			 */
 			*result = commit_list_insert(one, result);
 			return 0;
 		}
 	}
 
 	if (!one)
 		return 0;
 	if (repo_parse_commit(r, one))
 		return error(_("could not parse commit %s"),
 			     oid_to_hex(&one->object.oid));
 	for (i = 0; i < n; i++) {
 		if (!twos[i])
 			return 0;
 		if (repo_parse_commit(r, twos[i]))
 			return error(_("could not parse commit %s"),
 				     oid_to_hex(&twos[i]->object.oid));
 	}
 
 	if (paint_down_to_common(r, one, n, twos, 0, 0, &list)) {
 		free_commit_list(list);
 		return -1;
 	}
 
 	while (list) {
 		struct commit *commit = pop_commit(&list);
 		if (!(commit->object.flags & STALE))
-			commit_list_insert_by_date(commit, result);
+			tail = commit_list_append(commit, tail);
 	}
+	commit_list_sort_by_date(result);
 	return 0;
 }
 
@@ -421,47 +424,48 @@ static int remove_redundant(struct repository *r, struct commit **array,
 static int get_merge_bases_many_0(struct repository *r,
 				  struct commit *one,
 				  size_t n,
 				  struct commit **twos,
 				  int cleanup,
 				  struct commit_list **result)
 {
-	struct commit_list *list;
+	struct commit_list *list, **tail = result;
 	struct commit **rslt;
 	size_t cnt, i;
 	int ret;
 
 	if (merge_bases_many(r, one, n, twos, result) < 0)
 		return -1;
 	for (i = 0; i < n; i++) {
 		if (one == twos[i])
 			return 0;
 	}
 	if (!*result || !(*result)->next) {
 		if (cleanup) {
 			clear_commit_marks(one, all_flags);
 			clear_commit_marks_many(n, twos, all_flags);
 		}
 		return 0;
 	}
 
 	/* There are more than one */
 	cnt = commit_list_count(*result);
 	CALLOC_ARRAY(rslt, cnt);
 	for (list = *result, i = 0; list; list = list->next)
 		rslt[i++] = list->item;
 	free_commit_list(*result);
 	*result = NULL;
 
 	clear_commit_marks(one, all_flags);
 	clear_commit_marks_many(n, twos, all_flags);
 
 	ret = remove_redundant(r, rslt, cnt, &cnt);
 	if (ret < 0) {
 		free(rslt);
 		return -1;
 	}
 	for (i = 0; i < cnt; i++)
-		commit_list_insert_by_date(rslt[i], result);
+		tail = commit_list_append(rslt[i], tail);
+	commit_list_sort_by_date(result);
 	free(rslt);
 	return 0;
 }
diff --git a/t/perf/p6010-merge-base.sh b/t/perf/p6010-merge-base.sh
new file mode 100755
index 0000000000..54f52fa23e
--- /dev/null
+++ b/t/perf/p6010-merge-base.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+test_description='Test git merge-base'
+
+. ./perf-lib.sh
+
+test_perf_fresh_repo
+
+#
+# Creates lots of merges to make history traversal costly.  In
+# particular it creates 2^($max_level-1)-1 2-way merges on top of
+# 2^($max_level-1) root commits.  E.g., the commit history looks like
+# this for a $max_level of 3:
+#
+#     _1_
+#    /   \
+#   2     3
+#  / \   / \
+# 4   5 6   7
+#
+# The numbers are the fast-import marks, which also are the commit
+# messages.  1 is the HEAD commit and a merge, 2 and 3 are also merges,
+# 4-7 are the root commits.
+#
+build_history () {
+	local max_level="$1" &&
+	local level="${2:-1}" &&
+	local mark="${3:-1}" &&
+	if test $level -eq $max_level
+	then
+		echo "reset refs/heads/master" &&
+		echo "from $ZERO_OID" &&
+		echo "commit refs/heads/master" &&
+		echo "mark :$mark" &&
+		echo "committer C <c@example.com> 1234567890 +0000" &&
+		echo "data <<EOF" &&
+		echo "$mark" &&
+		echo "EOF"
+	else
+		local level1=$((level+1)) &&
+		local mark1=$((2*mark)) &&
+		local mark2=$((2*mark+1)) &&
+		build_history $max_level $level1 $mark1 &&
+		build_history $max_level $level1 $mark2 &&
+		echo "commit refs/heads/master" &&
+		echo "mark :$mark" &&
+		echo "committer C <c@example.com> 1234567890 +0000" &&
+		echo "data <<EOF" &&
+		echo "$mark" &&
+		echo "EOF" &&
+		echo "from :$mark1" &&
+		echo "merge :$mark2"
+	fi
+}
+
+#
+# Creates a new merge history in the same shape as build_history does,
+# while reusing the same root commits.  This way the two top commits
+# have 2^($max_level-1) merge bases between them.
+#
+build_history2 () {
+	local max_level="$1" &&
+	local level="${2:-1}" &&
+	local mark="${3:-1}" &&
+	if test $level -lt $max_level
+	then
+		local level1=$((level+1)) &&
+		local mark1=$((2*mark)) &&
+		local mark2=$((2*mark+1)) &&
+		build_history2 $max_level $level1 $mark1 &&
+		build_history2 $max_level $level1 $mark2 &&
+		echo "commit refs/heads/master" &&
+		echo "mark :$mark" &&
+		echo "committer C <c@example.com> 1234567890 +0000" &&
+		echo "data <<EOF" &&
+		echo "$mark II" &&
+		echo "EOF" &&
+		echo "from :$mark1" &&
+		echo "merge :$mark2"
+	fi
+}
+
+test_expect_success 'setup' '
+	max_level=15 &&
+	build_history $max_level | git fast-import --export-marks=marks &&
+	git tag one &&
+	build_history2 $max_level | git fast-import --import-marks=marks --force &&
+	git tag two &&
+	git gc &&
+	git log --format=%H --no-merges >expect
+'
+
+test_perf 'git merge-base' '
+	git merge-base --all one two >actual
+'
+
+test_expect_success 'verify result' '
+	test_cmp expect actual
+'
+
+test_done
-- 
2.51.1

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2025-10-24 16:47 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-24 16:47 [PATCH] commit-reach: avoid commit_list_insert_by_date() René Scharfe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).