* [PATCH v2 1/2] commit-reach: handle cycles in contains walk
2026-06-09 2:36 [PATCH v2 0/2] Reuse --contains traversal results Tamir Duberstein
@ 2026-06-09 2:36 ` Tamir Duberstein
2026-06-09 2:36 ` [PATCH v2 2/2] ref-filter: memoize --contains with generations Tamir Duberstein
1 sibling, 0 replies; 3+ messages in thread
From: Tamir Duberstein @ 2026-06-09 2:36 UTC (permalink / raw)
To: git
Cc: Jeff King, Karthik Nayak, Junio C Hamano, Victoria Dye,
Derrick Stolee, Elijah Newren, Tamir Duberstein
git tag --contains uses a memoized traversal that assumes commit
ancestry is acyclic. Replacement refs can violate that assumption,
causing the traversal to revisit a commit already on its stack
indefinitely.
Mark commits while they are active. If the traversal encounters an
active commit, discard the cache because it cannot distinguish answers
produced by the interrupted walk. Then fall back to the cycle-safe
reachability walk for that candidate.
Signed-off-by: Tamir Duberstein <tamird@gmail.com>
---
commit-reach.c | 30 ++++++++++++++++++++++++++----
commit-reach.h | 3 ++-
t/t7004-tag.sh | 21 +++++++++++++++++++++
3 files changed, 49 insertions(+), 5 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index 9b3ea46d6f..65b618959b 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -708,7 +708,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c)
/*
* Test whether the candidate is contained in the list.
- * Do not recurse to find out, though, but return -1 if inconclusive.
+ * Do not recurse to find out, though, but return CONTAINS_UNKNOWN if
+ * inconclusive.
*/
static enum contains_result contains_test(struct commit *candidate,
const struct commit_list *want,
@@ -744,7 +745,7 @@ static void push_to_contains_stack(struct commit *candidate, struct contains_sta
}
static enum contains_result contains_tag_algo(struct commit *candidate,
- const struct commit_list *want,
+ struct commit_list *want,
struct contains_cache *cache)
{
struct contains_stack contains_stack = { 0, 0, NULL };
@@ -765,6 +766,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
if (result != CONTAINS_UNKNOWN)
return result;
+ *contains_cache_at(cache, candidate) = CONTAINS_IN_PROGRESS;
push_to_contains_stack(candidate, &contains_stack);
while (contains_stack.nr) {
struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1];
@@ -776,8 +778,8 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
contains_stack.nr--;
}
/*
- * If we just popped the stack, parents->item has been marked,
- * therefore contains_test will return a meaningful yes/no.
+ * A parent may have just been popped and marked, or may still
+ * be active when replacement refs create a cycle.
*/
else switch (contains_test(parents->item, want, cache, cutoff)) {
case CONTAINS_YES:
@@ -787,13 +789,33 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
case CONTAINS_NO:
entry->parents = parents->next;
break;
+ case CONTAINS_IN_PROGRESS:
+ /*
+ * Partial negative answers are not safe across a cycle.
+ * Discard them and use the cycle-safe reachability walk.
+ */
+ goto cycle;
case CONTAINS_UNKNOWN:
+ *contains_cache_at(cache, parents->item) =
+ CONTAINS_IN_PROGRESS;
push_to_contains_stack(parents->item, &contains_stack);
break;
}
}
free(contains_stack.contains_stack);
return contains_test(candidate, want, cache, cutoff);
+
+cycle:
+ free(contains_stack.contains_stack);
+ clear_contains_cache(cache);
+ init_contains_cache(cache);
+
+ result = repo_is_descendant_of(the_repository, candidate, want);
+ if (result < 0)
+ exit(128);
+ *contains_cache_at(cache, candidate) =
+ result ? CONTAINS_YES : CONTAINS_NO;
+ return result ? CONTAINS_YES : CONTAINS_NO;
}
int commit_contains(struct ref_filter *filter, struct commit *commit,
diff --git a/commit-reach.h b/commit-reach.h
index 3f3a563d8a..f908d305b1 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -73,7 +73,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid);
enum contains_result {
CONTAINS_UNKNOWN = 0,
CONTAINS_NO,
- CONTAINS_YES
+ CONTAINS_YES,
+ CONTAINS_IN_PROGRESS
};
define_commit_slab(contains_cache, enum contains_result);
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index d918005dd9..1ed91bb66e 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1611,6 +1611,27 @@ test_expect_success 'checking that first commit is in all tags (hash)' '
test_cmp expected actual
'
+test_expect_success 'tag --contains handles cyclic replacement histories' '
+ first=$(git rev-parse HEAD~2) &&
+ second=$(git rev-parse HEAD~) &&
+ third=$(git rev-parse HEAD) &&
+ test_when_finished "
+ git replace -d $first
+ git replace -d $third
+ git tag -d cycle-a cycle-b
+ " &&
+ git tag cycle-a "$first" &&
+ git tag cycle-b "$third" &&
+ git replace --graft "$first" "$third" "$second" &&
+ git replace --graft "$third" "$first" &&
+ cat >expected <<-\EOF &&
+ cycle-a
+ cycle-b
+ EOF
+ git tag --contains="$second" --list "cycle-*" >actual &&
+ test_cmp expected actual
+'
+
# other ways of specifying the commit
test_expect_success 'checking that first commit is in all tags (tag)' '
cat >expected <<-\EOF &&
--
2.54.0.501.g0fb508de08
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v2 2/2] ref-filter: memoize --contains with generations
2026-06-09 2:36 [PATCH v2 0/2] Reuse --contains traversal results Tamir Duberstein
2026-06-09 2:36 ` [PATCH v2 1/2] commit-reach: handle cycles in contains walk Tamir Duberstein
@ 2026-06-09 2:36 ` Tamir Duberstein
1 sibling, 0 replies; 3+ messages in thread
From: Tamir Duberstein @ 2026-06-09 2:36 UTC (permalink / raw)
To: git
Cc: Jeff King, Karthik Nayak, Junio C Hamano, Victoria Dye,
Derrick Stolee, Elijah Newren, Tamir Duberstein
git branch and git for-each-ref call repo_is_descendant_of() for
each candidate selected by --contains or --no-contains. Each call
starts a new graph walk, so refs with shared history repeatedly
traverse the same commits.
ffc4b8012d (tag: speed up --contains calculation, 2011-06-11)
introduced a depth-first walk for git tag that caches positive and
negative answers across candidates. ee2bd06b0f (ref-filter: implement
'--contains' option, 2015-07-07) preserved both implementations when
ref-filter learned --contains.
The memoized walk is not always faster. Without generation numbers,
a negative check can walk to the root even when the breadth-first
merge-base walk finds a nearby divergence. With generation numbers,
the depth-first walk can stop below the oldest target while still
reusing answers across candidates.
Keep the existing memoized selection for git tag. Select it for other
ref-filter callers when generation numbers are enabled, and retain
the breadth-first walk otherwise.
When generation numbers are unavailable, repo_is_descendant_of() can
return -1 if ancestry cannot be read. The ref-filter Boolean interface
treated that error as a match. Check it and exit instead. The memoized
path already dies on the same parse failure, so both selected paths now
fail rather than return a result.
Add p1500 cases for up to 8,192 packed refs along one first-parent
history and for sibling refs near the tip with generation numbers
forced off.
On a checkout with 62,174 remote-tracking refs and generation numbers
enabled, I ran:
hyperfine --warmup 0 --runs 3 \
--command-name parent \
'"$parent" branch -r --contains c78ae85f3ce7e >/dev/null' \
--command-name this-commit \
'"$this" branch -r --contains c78ae85f3ce7e >/dev/null'
The results were:
parent this commit
elapsed 104.365 s 467.7 ms
user 93.702 s 220.2 ms
system 0.723 s 182.7 ms
The wall-time standard deviations were 11.356 seconds and 133.8
milliseconds, respectively. Separate runs without redirection produced
the same output with SHA-256
2466f6e2b72aa16b1a2126eddb81c8a1b2764ee251204ac034c191a925aa896f.
Both revisions were built with the default -O2 flags using Apple
clang 21.0.0 on macOS 26.5. The machine was a MacBook Pro (Mac16,6)
with a 16-core Apple M4 Max (12 performance and four efficiency
cores) and 128 GB RAM.
Link: https://lore.kernel.org/git/1445163904-24611-1-git-send-email-Karthik.188@gmail.com/
Link: https://lore.kernel.org/r/20230324191009.GA536967@coredump.intra.peff.net
Link: https://lore.kernel.org/git/20260527070510.3510836-1-krka@spotify.com/
Link: https://lore.kernel.org/r/20260608223430.GA340696@coredump.intra.peff.net
Suggested-by: Jeff King <peff@peff.net>
Signed-off-by: Tamir Duberstein <tamird@gmail.com>
---
commit-reach.c | 13 +++++++++--
commit-reach.h | 7 ++++++
t/perf/p1500-graph-walks.sh | 49 +++++++++++++++++++++++++++++++++++++++++-
t/t6301-for-each-ref-errors.sh | 22 +++++++++++++++++++
4 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index 65b618959b..83a48004ef 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -821,9 +821,18 @@ static enum contains_result contains_tag_algo(struct commit *candidate,
int commit_contains(struct ref_filter *filter, struct commit *commit,
struct commit_list *list, struct contains_cache *cache)
{
- if (filter->with_commit_tag_algo)
+ int result;
+
+ if (!list)
+ return 1;
+ if (filter->with_commit_tag_algo ||
+ generation_numbers_enabled(the_repository))
return contains_tag_algo(commit, list, cache) == CONTAINS_YES;
- return repo_is_descendant_of(the_repository, commit, list);
+
+ result = repo_is_descendant_of(the_repository, commit, list);
+ if (result < 0)
+ exit(128);
+ return result;
}
int can_all_from_reach_with_flag(struct object_array *from,
diff --git a/commit-reach.h b/commit-reach.h
index f908d305b1..da6796a354 100644
--- a/commit-reach.h
+++ b/commit-reach.h
@@ -79,6 +79,13 @@ enum contains_result {
define_commit_slab(contains_cache, enum contains_result);
+/*
+ * Return whether "commit" is a descendant of any commit in "list". An empty
+ * list matches.
+ *
+ * The memoized traversal records answers in "cache" for one fixed "list".
+ * Clear it before changing the list.
+ */
int commit_contains(struct ref_filter *filter, struct commit *commit,
struct commit_list *list, struct contains_cache *cache);
diff --git a/t/perf/p1500-graph-walks.sh b/t/perf/p1500-graph-walks.sh
index 5b23ce5db9..99b54e274b 100755
--- a/t/perf/p1500-graph-walks.sh
+++ b/t/perf/p1500-graph-walks.sh
@@ -32,12 +32,47 @@ test_expect_success 'setup' '
echo "X:$line" >>test-tool-tags || return 1
done &&
- commit=$(git commit-tree $(git rev-parse HEAD^{tree})) &&
+ git rev-list --first-parent --max-count=8192 HEAD >contains-commits &&
+ test_file_not_empty contains-commits &&
+ git update-ref refs/contains-perf-base "$(tail -n 1 contains-commits)" &&
+ awk "{
+ printf \"update refs/contains-perf/%04d %s\\n\", NR, \$1
+ }" contains-commits |
+ git update-ref --stdin &&
+ git pack-refs --include "refs/contains-perf/*" &&
+
+ tree=$(git rev-parse HEAD^{tree}) &&
+ base=$(git rev-parse HEAD) &&
+ target=$(echo target | git commit-tree "$tree" -p "$base") &&
+ git update-ref refs/contains-diverged/target "$target" &&
+ for i in $(test_seq 1 4)
+ do
+ commit=$(echo candidate-$i |
+ git commit-tree "$tree" -p "$base") &&
+ git update-ref refs/contains-diverged/candidate-$i "$commit" ||
+ return 1
+ done &&
+
+ commit=$(git commit-tree "$tree") &&
git update-ref refs/heads/disjoint-base $commit &&
git commit-graph write --reachable
'
+test_expect_success 'verify contains results' '
+ git for-each-ref --contains=refs/contains-perf-base \
+ refs/contains-perf/ >actual &&
+ test_line_count = $(wc -l <contains-commits) actual &&
+
+ echo refs/contains-diverged/target >expect &&
+ GIT_TEST_COMMIT_GRAPH=0 \
+ git -c core.commitGraph=false for-each-ref \
+ --format="%(refname)" \
+ --contains=refs/contains-diverged/target \
+ refs/contains-diverged/ >actual &&
+ test_cmp expect actual
+'
+
test_perf 'ahead-behind counts: git for-each-ref' '
git for-each-ref --format="%(ahead-behind:HEAD)" --stdin <refs
'
@@ -62,6 +97,18 @@ test_perf 'contains: git tag --merged' '
xargs git tag --merged=HEAD <tags
'
+test_perf 'contains: git for-each-ref --contains' '
+ git for-each-ref --contains=refs/contains-perf-base \
+ refs/contains-perf/ >/dev/null
+'
+
+test_perf 'contains without generations: divergent refs' '
+ GIT_TEST_COMMIT_GRAPH=0 \
+ git -c core.commitGraph=false for-each-ref \
+ --contains=refs/contains-diverged/target \
+ refs/contains-diverged/ >/dev/null
+'
+
test_perf 'is-base check: test-tool reach (refs)' '
test-tool reach get_branch_base_for_tip <test-tool-refs
'
diff --git a/t/t6301-for-each-ref-errors.sh b/t/t6301-for-each-ref-errors.sh
index e06feb06e9..72b27c8be3 100755
--- a/t/t6301-for-each-ref-errors.sh
+++ b/t/t6301-for-each-ref-errors.sh
@@ -52,6 +52,28 @@ test_expect_success 'Missing objects are reported correctly' '
test_must_be_empty brief-err
'
+test_expect_success 'missing ancestors are reported by contains filters' '
+ test_when_finished "git update-ref -d refs/heads/missing-parent" &&
+ {
+ echo "tree $(git rev-parse HEAD^{tree})" &&
+ echo "parent $MISSING" &&
+ git cat-file commit HEAD |
+ sed -n -e "/^author /p" -e "/^committer /p" &&
+ echo &&
+ echo "missing parent"
+ } >commit &&
+ broken=$(git hash-object -t commit -w commit) &&
+ git update-ref refs/heads/missing-parent "$broken" &&
+ for option in --contains --no-contains
+ do
+ test_must_fail git for-each-ref "$option=HEAD" \
+ refs/heads/missing-parent >out 2>err &&
+ test_must_be_empty out &&
+ test_grep "parse commit $MISSING" err ||
+ return 1
+ done
+'
+
test_expect_success 'ahead-behind requires an argument' '
test_must_fail git for-each-ref \
--format="%(ahead-behind)" 2>err &&
--
2.54.0.501.g0fb508de08
^ permalink raw reply related [flat|nested] 3+ messages in thread