* Re: [PATCH v5 0/4] history: add squash subcommand to fold a range
From: Phillip Wood @ 2026-06-26 13:12 UTC (permalink / raw)
To: Harald Nordgren, phillip.wood
Cc: Harald Nordgren via GitGitGadget, git, Patrick Steinhardt
In-Reply-To: <CAHwyqnWXaG1HGunztVgUdWnVogqCHRbxh8pcS5fGA6f3mB-nEA@mail.gmail.com>
On 26/06/2026 10:57, Harald Nordgren wrote:
> On Fri, Jun 26, 2026 at 10:53 AM Phillip Wood <phillip.wood123@gmail.com> wrote:>> >> Only accepting a single argument is quite limiting as one
>> cannot say
>>
>> git history squash ^:/base :/tip
>
> I don't understand why this is limiting? It thought it was clear that
> it should be one argument REF1..REF2 ? What does '^:/base :/tip'
> achieve that '^:/base..:/tip' cannot?
'^/:base..:/tip' is not a range - everything after the first '/:' is
treated as a regular expression to search for.
Thanks
Phillip
^ permalink raw reply
* [PATCH v3 8/8] commit-reach: move min_generation check into paint_queue_get()
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Consolidate the min_generation termination condition into
paint_queue_get(), alongside the existing stale-entry and
side-exhaustion checks.
Move last_gen into struct paint_state so that
commit_graph_generation() is called exactly once per dequeued commit
and the result is shared across all termination checks and the
monotonicity BUG assertion. The loop body in paint_down_to_common()
reads state.last_gen instead of recomputing the generation number.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
.../technical/paint-down-to-common.adoc | 9 ++++++
commit-reach.c | 31 +++++++++++--------
2 files changed, 27 insertions(+), 13 deletions(-)
diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc
index 983dfcf233..eef249f4a4 100644
--- a/Documentation/technical/paint-down-to-common.adoc
+++ b/Documentation/technical/paint-down-to-common.adoc
@@ -97,6 +97,8 @@ ends when one of the following conditions holds:
3. Side exhaustion: no pure PARENT1 or pure PARENT2 commits
remain in the queue, no pending merge-base candidates exist,
and the walk has entered the finite-generation region.
+ 4. Generation cutoff: the dequeued commit's generation is below
+ a caller-supplied `min_generation` threshold.
Stale entry condition
~~~~~~~~~~~~~~~~~~~~~
@@ -121,6 +123,13 @@ time and an exhausted side cannot reappear. In the INFINITY region,
commit-date ordering can violate this guarantee, so the check is
skipped.
+Generation cutoff
+~~~~~~~~~~~~~~~~~
+Some callers (notably `remove_redundant()`) supply a `min_generation`
+threshold -- the minimum generation of the input commits. No merge
+base can have a generation below this threshold, so the walk
+terminates as soon as it dequeues such a commit.
+
Related documentation
---------------------
diff --git a/commit-reach.c b/commit-reach.c
index 0248d6fedb..0cd785c31b 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -89,6 +89,8 @@ struct paint_state {
int p1_count;
int p2_count;
int pending_merge_bases;
+ timestamp_t min_generation;
+ timestamp_t last_gen;
};
static void paint_count_update(struct paint_state *state,
@@ -138,11 +140,23 @@ static void paint_queue_put(struct paint_state *state,
static struct commit *paint_queue_get(struct paint_state *state)
{
struct commit *commit = prio_queue_get(&state->queue);
+ timestamp_t generation;
if (!commit)
return NULL;
commit->object.flags &= ~ENQUEUED;
+ generation = commit_graph_generation(commit);
+
+ if (generation > state->last_gen)
+ BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
+ generation, state->last_gen,
+ oid_to_hex(&commit->object.oid));
+ state->last_gen = generation;
+
+ /* generation cutoff */
+ if (generation < state->min_generation)
+ return NULL;
if (!state->pending_merge_bases) {
/* only stale entries remain */
@@ -151,7 +165,7 @@ static struct commit *paint_queue_get(struct paint_state *state)
/* one side is exhausted */
if ((!state->p1_count || !state->p2_count) &&
- commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY)
+ generation < GENERATION_NUMBER_INFINITY)
return NULL;
}
@@ -177,9 +191,10 @@ static int paint_down_to_common(struct repository *r,
struct commit *commit;
int i;
int steps = 0;
- timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;
+ state.min_generation = min_generation;
+ state.last_gen = GENERATION_NUMBER_INFINITY;
if (!min_generation && !corrected_commit_dates_enabled(r))
state.queue.compare = compare_commits_by_commit_date;
@@ -196,18 +211,8 @@ static int paint_down_to_common(struct repository *r,
while ((commit = paint_queue_get(&state))) {
struct commit_list *parents;
int flags;
- timestamp_t generation = commit_graph_generation(commit);
steps++;
- if (generation > last_gen)
- BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
- generation, last_gen,
- oid_to_hex(&commit->object.oid));
- last_gen = generation;
-
- if (generation < min_generation)
- break;
-
flags = commit->object.flags & (PARENT1 | PARENT2 | STALE);
if (flags == (PARENT1 | PARENT2)) {
if (!(commit->object.flags & RESULT)) {
@@ -219,7 +224,7 @@ static int paint_down_to_common(struct repository *r,
* descendant of this one.
*/
if (!(mb_flags & MERGE_BASE_FIND_ALL) &&
- generation < GENERATION_NUMBER_INFINITY)
+ state.last_gen < GENERATION_NUMBER_INFINITY)
break;
}
/* Mark parents of a found merge stale */
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 7/8] commit-reach: terminate merge-base walk when one paint side is exhausted
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Add an early termination check to paint_down_to_common() using the
per-side counters introduced earlier. Once the walk enters the
finite-generation region, terminate early when one side's exclusive
count drops to zero -- no new merge-base can form without both paint
sides meeting.
The check also waits for pending_merge_bases to reach zero, ensuring
all merge-base candidates have been dequeued and recorded before
exiting.
The INFINITY gate ensures correctness: commits without a commit-graph
entry have GENERATION_NUMBER_INFINITY and are ordered by commit date,
which is not topologically reliable. The optimization only fires
once the walk enters the finite-generation region where ordering
guarantees hold.
Widen the existing generation-monotonicity BUG assertion to fire
unconditionally, not only when min_generation is set. The
side-exhaustion optimization depends on correct generation ordering,
so the assertion should always be active.
Step counts measured with trace2 on git.git with commit-graph:
merge-base --all v2.0.0 v2.55.0-rc1:
before: 72264 steps after: 44589 steps
merge-base --all v2.55.0-rc1 v2.55.0-rc1~5:
before: 110 steps after: 7 steps
Helped-by: Derrick Stolee <stolee@gmail.com>
Helped-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
.../technical/paint-down-to-common.adoc | 17 +++++++++++++++++
commit-reach.c | 19 +++++++++++++++----
t/t6600-test-reach.sh | 4 ++--
3 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc
index 0f4e1892a5..983dfcf233 100644
--- a/Documentation/technical/paint-down-to-common.adoc
+++ b/Documentation/technical/paint-down-to-common.adoc
@@ -94,6 +94,9 @@ ends when one of the following conditions holds:
1. The queue is empty.
2. The queue contains only stale entries.
+ 3. Side exhaustion: no pure PARENT1 or pure PARENT2 commits
+ remain in the queue, no pending merge-base candidates exist,
+ and the walk has entered the finite-generation region.
Stale entry condition
~~~~~~~~~~~~~~~~~~~~~
@@ -104,6 +107,20 @@ existing candidates by proving one is an ancestor of another, but
`remove_redundant()` handles that as a post-processing step, so it
is safe to exit early.
+Side-exhaustion condition
+~~~~~~~~~~~~~~~~~~~~~~~~~
+A new merge-base requires commits from both sides to meet. When one
+side's exclusive counter reaches zero and there are no pending
+merge-base candidates, no future traversal step can produce a new
+candidate.
+
+This optimization only activates in the finite-generation region
+where topological ordering holds. In that region, children are
+always visited before parents, so paint flags are final at visit
+time and an exhausted side cannot reappear. In the INFINITY region,
+commit-date ordering can violate this guarantee, so the check is
+skipped.
+
Related documentation
---------------------
diff --git a/commit-reach.c b/commit-reach.c
index ee0e0fdf6e..0248d6fedb 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -131,6 +131,10 @@ static void paint_queue_put(struct paint_state *state,
}
}
+/*
+ * Dequeue the next commit for the paint walk, or return NULL when
+ * no more merge bases can be discovered.
+ */
static struct commit *paint_queue_get(struct paint_state *state)
{
struct commit *commit = prio_queue_get(&state->queue);
@@ -140,9 +144,16 @@ static struct commit *paint_queue_get(struct paint_state *state)
commit->object.flags &= ~ENQUEUED;
- if (!state->p1_count && !state->p2_count &&
- !state->pending_merge_bases)
- return NULL;
+ if (!state->pending_merge_bases) {
+ /* only stale entries remain */
+ if (!state->p1_count && !state->p2_count)
+ return NULL;
+
+ /* one side is exhausted */
+ if ((!state->p1_count || !state->p2_count) &&
+ commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY)
+ return NULL;
+ }
paint_count_update(state, commit->object.flags, -1);
return commit;
@@ -188,7 +199,7 @@ static int paint_down_to_common(struct repository *r,
timestamp_t generation = commit_graph_generation(commit);
steps++;
- if (min_generation && generation > last_gen)
+ if (generation > last_gen)
BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
generation, last_gen,
oid_to_hex(&commit->object.oid));
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 51f3d70492..6365007560 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -220,7 +220,7 @@ test_expect_success 'in_merge_bases_many:self' '
EOF
echo "in_merge_bases_many(A,X):1" >expect &&
test_all_modes in_merge_bases_many &&
- test_paint_down_steps 45 2 25 3
+ test_paint_down_steps 45 1 25 1
'
test_expect_success 'is_descendant_of:hit' '
@@ -337,7 +337,7 @@ test_expect_success 'merge-base --all commit-walk steps' '
>input &&
git rev-parse commit-9-1 >expect &&
run_all_modes git merge-base --all commit-9-9 commit-9-1 &&
- test_paint_down_steps 81 80 81 81
+ test_paint_down_steps 81 9 57 10
'
test_expect_success 'reduce_heads' '
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 6/8] commit-reach: remove unused nonstale_queue dedup wrappers
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
nonstale_queue_put_dedup() and nonstale_queue_get_dedup() became
unused after the previous commit. The core nonstale_queue functions
remain in use by ahead_behind().
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
commit-reach.c | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index 0f29b143bd..ee0e0fdf6e 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -79,24 +79,6 @@ static void clear_nonstale_queue(struct nonstale_queue *queue)
queue->max_nonstale = NULL;
}
-static void nonstale_queue_put_dedup(struct nonstale_queue *queue,
- struct commit *c)
-{
- if (c->object.flags & ENQUEUED)
- return;
- c->object.flags |= ENQUEUED;
- nonstale_queue_put(queue, c);
-}
-
-static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue)
-{
- struct commit *commit = nonstale_queue_get(queue);
-
- if (commit)
- commit->object.flags &= ~ENQUEUED;
- return commit;
-}
-
/*
* Priority queue with per-side commit counters for paint_down_to_common().
* Each non-stale queued commit occupies exactly one bucket: PARENT1-only,
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 5/8] commit-reach: introduce struct paint_state with per-side counters
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Add a paint_state struct for use by paint_down_to_common() that
wraps a prio_queue with per-side commit counters. Each non-stale
queued commit occupies exactly one counter bucket based on its
paint flags: PARENT1-only, PARENT2-only, or both sides (a pending
merge-base candidate).
The counters are maintained by paint_count_update() which adjusts
the appropriate bucket by a signed delta. An exhaustive switch on
the paint+stale bits documents all valid flag combinations in one
place.
Convert paint_down_to_common() to use paint_state. The loop now
drains the queue via paint_queue_get() which returns NULL when all
counters reach zero, replacing the old pointer-based termination
(max_nonstale). This is equivalent behavior -- both conditions
detect that no non-stale entries remain.
paint_queue_get() uses a "pop first" form: it dequeues a commit,
then checks the counters. This means the loop exits one iteration
earlier than the old code in some topologies (the popped stale
commit is never processed), so a few step counts drop by one.
The existing nonstale_queue is left in place for ahead_behind().
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
.../technical/paint-down-to-common.adoc | 9 +-
commit-reach.c | 94 ++++++++++++++++---
t/t6600-test-reach.sh | 4 +-
3 files changed, 85 insertions(+), 22 deletions(-)
diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc
index c10d5d2887..0f4e1892a5 100644
--- a/Documentation/technical/paint-down-to-common.adoc
+++ b/Documentation/technical/paint-down-to-common.adoc
@@ -88,15 +88,12 @@ re-enqueued is bounded by the number of flag transitions.
Termination
-----------
-The walk uses a `nonstale_queue` wrapper around `prio_queue` that
-tracks `max_nonstale`: the lowest-priority non-stale commit enqueued
-so far. Once that commit is dequeued, every remaining entry is known
-to be STALE and the loop terminates. Specifically, the main loop
+The walk tracks the number of commits of each type in the queue
+(PARENT1-only, PARENT2-only, pending merge-base). The main loop
ends when one of the following conditions holds:
1. The queue is empty.
- 2. `max_nonstale` has been dequeued, meaning the queue only contains
- STALE entries.
+ 2. The queue contains only stale entries.
Stale entry condition
~~~~~~~~~~~~~~~~~~~~~
diff --git a/commit-reach.c b/commit-reach.c
index f6a438550b..0f29b143bd 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -97,6 +97,75 @@ static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue)
return commit;
}
+/*
+ * Priority queue with per-side commit counters for paint_down_to_common().
+ * Each non-stale queued commit occupies exactly one bucket: PARENT1-only,
+ * PARENT2-only, or both (a pending merge-base candidate).
+ */
+struct paint_state {
+ struct prio_queue queue;
+ int p1_count;
+ int p2_count;
+ int pending_merge_bases;
+};
+
+static void paint_count_update(struct paint_state *state,
+ unsigned flags, int delta)
+{
+ switch (flags & (PARENT1 | PARENT2 | STALE)) {
+ case PARENT1:
+ state->p1_count += delta;
+ break;
+
+ case PARENT2:
+ state->p2_count += delta;
+ break;
+
+ case PARENT1 | PARENT2:
+ state->pending_merge_bases += delta;
+ break;
+
+ case PARENT1 | PARENT2 | STALE:
+ break;
+
+ default:
+ BUG("unexpected paint state");
+ }
+}
+
+static void paint_queue_put(struct paint_state *state,
+ struct commit *c, unsigned add_flags)
+{
+ unsigned old_flags = c->object.flags;
+ c->object.flags |= add_flags;
+
+ if (old_flags & ENQUEUED) {
+ paint_count_update(state, old_flags, -1);
+ paint_count_update(state, c->object.flags, 1);
+ } else {
+ c->object.flags |= ENQUEUED;
+ prio_queue_put(&state->queue, c);
+ paint_count_update(state, c->object.flags, 1);
+ }
+}
+
+static struct commit *paint_queue_get(struct paint_state *state)
+{
+ struct commit *commit = prio_queue_get(&state->queue);
+
+ if (!commit)
+ return NULL;
+
+ commit->object.flags &= ~ENQUEUED;
+
+ if (!state->p1_count && !state->p2_count &&
+ !state->pending_merge_bases)
+ return NULL;
+
+ paint_count_update(state, commit->object.flags, -1);
+ return commit;
+}
+
/*
* See Documentation/technical/paint-down-to-common.adoc
*
@@ -109,31 +178,29 @@ static int paint_down_to_common(struct repository *r,
enum merge_base_flags mb_flags,
struct commit_list **result)
{
- struct nonstale_queue queue = {
- { compare_commits_by_gen_then_commit_date }
+ struct paint_state state = {
+ .queue = { compare_commits_by_gen_then_commit_date }
};
+ struct commit *commit;
int i;
int steps = 0;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;
if (!min_generation && !corrected_commit_dates_enabled(r))
- queue.pq.compare = compare_commits_by_commit_date;
+ state.queue.compare = compare_commits_by_commit_date;
one->object.flags |= PARENT1;
if (!n) {
commit_list_append(one, result);
return 0;
}
- nonstale_queue_put_dedup(&queue, one);
+ paint_queue_put(&state, one, 0);
- for (i = 0; i < n; i++) {
- twos[i]->object.flags |= PARENT2;
- nonstale_queue_put_dedup(&queue, twos[i]);
- }
+ for (i = 0; i < n; i++)
+ paint_queue_put(&state, twos[i], PARENT2);
- while (queue.max_nonstale) {
- struct commit *commit = nonstale_queue_get_dedup(&queue);
+ while ((commit = paint_queue_get(&state))) {
struct commit_list *parents;
int flags;
timestamp_t generation = commit_graph_generation(commit);
@@ -172,7 +239,7 @@ static int paint_down_to_common(struct repository *r,
if ((p->object.flags & flags) == flags)
continue;
if (repo_parse_commit(r, p)) {
- clear_nonstale_queue(&queue);
+ clear_prio_queue(&state.queue);
commit_list_free(*result);
*result = NULL;
/*
@@ -187,12 +254,11 @@ static int paint_down_to_common(struct repository *r,
return error(_("could not parse commit %s"),
oid_to_hex(&p->object.oid));
}
- p->object.flags |= flags;
- nonstale_queue_put_dedup(&queue, p);
+ paint_queue_put(&state, p, flags);
}
}
- clear_nonstale_queue(&queue);
+ clear_prio_queue(&state.queue);
trace2_data_intmax("paint_down_to_common", r,
"steps", steps);
commit_list_sort_by_date(result);
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index b3a31b80ac..51f3d70492 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -289,7 +289,7 @@ test_expect_success 'get_merge_bases_many:pending-stale' '
git rev-parse ps-B
} >expect &&
test_all_modes get_merge_bases_many &&
- test_paint_down_steps 6 6 6 6
+ test_paint_down_steps 5 5 5 5
'
test_expect_success 'get_merge_bases_many:infinity-both-sides' '
@@ -304,7 +304,7 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' '
git rev-parse pi-B
} >expect &&
test_all_modes get_merge_bases_many &&
- test_paint_down_steps 5 5 5 5
+ test_paint_down_steps 5 4 5 5
'
test_expect_success 'setup mixed finite/INFINITY topology' '
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 4/8] commit-reach: add trace2 instrumentation to paint_down_to_common()
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Add a step counter and trace2_data_intmax() call so that the number
of commits visited during the paint walk is observable via
GIT_TRACE2_EVENT. This provides a way to measure the impact of
future optimizations without relying on wall-clock benchmarks alone.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
commit-reach.c | 5 ++++
t/t6600-test-reach.sh | 53 ++++++++++++++++++++++++++++++-------------
2 files changed, 42 insertions(+), 16 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index a9483759e0..f6a438550b 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -11,6 +11,7 @@
#include "tag.h"
#include "commit-reach.h"
#include "ewah/ewok.h"
+#include "trace2.h"
/* Remember to update object flag allocation in object.h */
#define PARENT1 (1u<<16)
@@ -112,6 +113,7 @@ static int paint_down_to_common(struct repository *r,
{ compare_commits_by_gen_then_commit_date }
};
int i;
+ int steps = 0;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;
@@ -135,6 +137,7 @@ static int paint_down_to_common(struct repository *r,
struct commit_list *parents;
int flags;
timestamp_t generation = commit_graph_generation(commit);
+ steps++;
if (min_generation && generation > last_gen)
BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
@@ -190,6 +193,8 @@ static int paint_down_to_common(struct repository *r,
}
clear_nonstale_queue(&queue);
+ trace2_data_intmax("paint_down_to_common", r,
+ "steps", steps);
commit_list_sort_by_date(result);
return 0;
}
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index 4b771b4c58..b3a31b80ac 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -118,24 +118,34 @@ test_expect_success 'setup' '
'
run_all_modes () {
- test_when_finished rm -rf .git/objects/info/commit-graph &&
- "$@" <input >actual &&
- test_cmp expect actual &&
- cp commit-graph-full .git/objects/info/commit-graph &&
- "$@" <input >actual &&
- test_cmp expect actual &&
- cp commit-graph-half .git/objects/info/commit-graph &&
- "$@" <input >actual &&
- test_cmp expect actual &&
- cp commit-graph-no-gdat .git/objects/info/commit-graph &&
- "$@" <input >actual &&
- test_cmp expect actual
+ graph=.git/objects/info/commit-graph &&
+ test_when_finished rm -rf "$graph" "${graph}s" &&
+ rm -f trace-mode-*.txt &&
+
+ for mode in none full half no-gdat
+ do
+ rm -rf "$graph" "${graph}s" &&
+ cp "commit-graph-${mode}" "$graph" 2>/dev/null ||
+ true &&
+ GIT_TRACE2_EVENT="$(pwd)/trace-mode-${mode}.txt" \
+ "$@" <input >actual &&
+ test_cmp expect actual || return 1
+ done
}
test_all_modes () {
run_all_modes test-tool reach "$@"
}
+test_paint_down_steps () {
+ for mode in none full half no-gdat
+ do
+ test_trace2_data paint_down_to_common steps "$1" \
+ <"trace-mode-${mode}.txt" || return 1
+ shift
+ done
+}
+
test_expect_success 'ref_newer:miss' '
cat >input <<-\EOF &&
A:commit-5-7
@@ -209,7 +219,8 @@ test_expect_success 'in_merge_bases_many:self' '
X:commit-6-8
EOF
echo "in_merge_bases_many(A,X):1" >expect &&
- test_all_modes in_merge_bases_many
+ test_all_modes in_merge_bases_many &&
+ test_paint_down_steps 45 2 25 3
'
test_expect_success 'is_descendant_of:hit' '
@@ -277,7 +288,8 @@ test_expect_success 'get_merge_bases_many:pending-stale' '
echo "get_merge_bases_many(A,X):" &&
git rev-parse ps-B
} >expect &&
- test_all_modes get_merge_bases_many
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 6 6 6 6
'
test_expect_success 'get_merge_bases_many:infinity-both-sides' '
@@ -291,7 +303,8 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' '
echo "get_merge_bases_many(A,X):" &&
git rev-parse pi-B
} >expect &&
- test_all_modes get_merge_bases_many
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 5 5 5 5
'
test_expect_success 'setup mixed finite/INFINITY topology' '
@@ -316,7 +329,15 @@ test_expect_success 'get_merge_bases_many:mixed-finite-infinity' '
echo "get_merge_bases_many(A,X):" &&
git rev-parse ps-X
} >expect &&
- test_all_modes get_merge_bases_many
+ test_all_modes get_merge_bases_many &&
+ test_paint_down_steps 3 3 3 3
+'
+
+test_expect_success 'merge-base --all commit-walk steps' '
+ >input &&
+ git rev-parse commit-9-1 >expect &&
+ run_all_modes git merge-base --all commit-9-9 commit-9-1 &&
+ test_paint_down_steps 81 80 81 81
'
test_expect_success 'reduce_heads' '
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 3/8] t6099, t6600: add side-exhaustion regression tests
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:08 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Add t6099 to test the case where multiple merge-base candidates exist
and one is an ancestor of another. This exercises the side-exhaustion
optimization in paint_down_to_common together with the
remove_redundant safety net in get_merge_bases_many_0.
Add a mixed finite/INFINITY test to t6600 where one tip is outside
the commit-graph (INFINITY generation) and the other is inside.
This exercises the region transition: the walk starts in the
INFINITY region where side-exhaustion is disabled, then crosses
into the finite region where it can fire.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
t/meson.build | 1 +
t/t6099-merge-base-side-exhaustion.sh | 82 +++++++++++++++++++++++++++
t/t6600-test-reach.sh | 25 ++++++++
3 files changed, 108 insertions(+)
create mode 100755 t/t6099-merge-base-side-exhaustion.sh
diff --git a/t/meson.build b/t/meson.build
index 3219264fe7..ee6ebdffb9 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -786,6 +786,7 @@ integration_tests = [
't6041-bisect-submodule.sh',
't6050-replace.sh',
't6060-merge-index.sh',
+ 't6099-merge-base-side-exhaustion.sh',
't6100-rev-list-in-order.sh',
't6101-rev-parse-parents.sh',
't6102-rev-list-unexpected-objects.sh',
diff --git a/t/t6099-merge-base-side-exhaustion.sh b/t/t6099-merge-base-side-exhaustion.sh
new file mode 100755
index 0000000000..4f1e0d50ef
--- /dev/null
+++ b/t/t6099-merge-base-side-exhaustion.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+
+test_description='merge-base with ancestor among merge-base candidates
+
+Test that merge-base --all correctly handles cases where
+multiple merge-base candidates exist and one is an ancestor
+of another. The side-exhaustion optimization in
+paint_down_to_common may exit before STALE propagation
+removes the ancestor, but remove_redundant catches it.
+
+Graph shape (parents are below children):
+
+ A ----------- X
+ |\ /|
+ | B---------/ |
+ | | |
+ e2 \ f2
+ | | |
+ e1 d1 f1
+ \ | /
+ \ | /
+ \| /
+ C
+
+A and X are the two tips.
+B and C are both reachable from A and X.
+B reaches C through d1.
+Only B should appear in merge-base --all output.
+'
+
+GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
+export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
+
+TEST_PASSES_SANITIZE_LEAK=true
+. ./test-lib.sh
+
+test_expect_success 'setup ancestor merge-base candidate' '
+ test_commit C &&
+
+ git checkout -b d-chain HEAD &&
+ test_commit d1 &&
+ test_commit B &&
+
+ git checkout -b e-path C &&
+ test_commit e1 &&
+ test_commit e2 &&
+
+ git checkout -b f-path C &&
+ test_commit f1 &&
+ test_commit f2 &&
+
+ git checkout -b branch-A e-path &&
+ test_merge A B &&
+
+ git checkout -b branch-X f-path &&
+ test_merge X B &&
+
+ git commit-graph write --reachable
+'
+
+test_expect_success 'merge-base --all excludes ancestor candidate' '
+ git rev-parse B >expected &&
+ git merge-base --all A X >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'merge-base (single) finds shallowest' '
+ git rev-parse B >expected &&
+ git merge-base A X >actual &&
+ test_cmp expected actual
+'
+
+# Without commit-graph: generation numbers are INFINITY,
+# side-exhaustion optimization does not fire.
+test_expect_success 'merge-base --all without commit-graph' '
+ rm -f .git/objects/info/commit-graph &&
+ git rev-parse B >expected &&
+ git merge-base --all A X >actual &&
+ test_cmp expected actual
+'
+
+test_done
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index c2e091aad1..4b771b4c58 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -294,6 +294,31 @@ test_expect_success 'get_merge_bases_many:infinity-both-sides' '
test_all_modes get_merge_bases_many
'
+test_expect_success 'setup mixed finite/INFINITY topology' '
+ # Create a commit outside all saved commit-graph files so it always
+ # has INFINITY generation, while its parent (ps-X) is in the graph
+ # with a finite generation. Use the ps-* orphan topology so we do
+ # not pollute the grid-based rev-list tests.
+ git checkout ps-X &&
+ test_env GIT_TEST_COMMIT_GRAPH= test_commit pm-INF
+'
+
+test_expect_success 'get_merge_bases_many:mixed-finite-infinity' '
+ # One tip (pm-INF) is outside the commit-graph with INFINITY
+ # generation; the other (ps-B) is in the graph with finite
+ # generation. The walk starts in the INFINITY region and crosses
+ # into the finite region where side-exhaustion can fire.
+ cat >input <<-\EOF &&
+ A:pm-INF
+ X:ps-B
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-X
+ } >expect &&
+ test_all_modes get_merge_bases_many
+'
+
test_expect_success 'reduce_heads' '
cat >input <<-\EOF &&
X:commit-1-10
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 2/8] t6600: add test cases for side-exhaustion edge cases
From: Elijah Newren via GitGitGadget @ 2026-06-26 13:07 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson, Elijah Newren
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Elijah Newren <newren@gmail.com>
Add test cases to t6600-test-reach.sh that exercise edge cases in the
side-exhaustion optimization for paint_down_to_common():
- in_merge_bases_many:self: commit is both A and one of the X inputs
- get_merge_bases_many:duplicate-twos: duplicate entries in X list
- get_merge_bases_many:pending-stale: STALE transition on an
already-painted commit (ps-* diamond topology)
- get_merge_bases_many:infinity-both-sides: both tips outside the
commit-graph with non-monotonic dates (pi-* topology)
Signed-off-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
t/t6600-test-reach.sh | 111 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 111 insertions(+)
diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh
index b5b314e570..c2e091aad1 100755
--- a/t/t6600-test-reach.sh
+++ b/t/t6600-test-reach.sh
@@ -49,6 +49,62 @@ test_expect_success 'setup' '
git tag -a -m "$x-$i" tag-$x-$i commit-$x-$i || return 1
done
done &&
+
+ # Build a small side topology to exercise the (PARENT1|PARENT2) ->
+ # (PARENT1|PARENT2|STALE) transition in paint_down_to_common(); the
+ # 10x10 grid above does not exercise it because no merge-base candidate
+ # there is a descendant of another, so STALE never reaches a
+ # still-pending candidate.
+ #
+ # ps-X
+ # /|\
+ # / | \
+ # ps-Z ps-B ps-W
+ # | / \ |
+ # | / \ |
+ # |/ \|
+ # ps-T1 ps-T2
+ #
+ # where ps-T1=merge(ps-Z,ps-B), ps-T2=merge(ps-W,ps-B), so
+ # merge-base(ps-T1,ps-T2) = ps-B. During the walk, ps-X transitions
+ # to (PARENT1|PARENT2) via ps-Z and ps-W before ps-B is dequeued;
+ # then the STALE-walk from ps-B transitions ps-X to
+ # (PARENT1|PARENT2|STALE).
+ git checkout --orphan ps-orphan &&
+ test_commit ps-X &&
+ git checkout -b ps-B-br ps-X && test_commit ps-B &&
+ git checkout -b ps-Z-br ps-X && test_commit ps-Z &&
+ git checkout -b ps-W-br ps-X && test_commit ps-W &&
+ git checkout -b ps-T1 ps-Z &&
+ git merge --no-ff -m ps-T1 ps-B &&
+ git checkout -b ps-T2 ps-W &&
+ git merge --no-ff -m ps-T2 ps-B &&
+
+ # Build a side topology that lives entirely outside the half
+ # commit-graph and has non-monotonic commit dates, to exercise the
+ # INFINITY-gate in paint_down_to_common. With both tips outside
+ # the graph, generation is INFINITY and the queue falls back to
+ # commit-date order, which here is non-monotonic.
+ #
+ # pi-X (date 500, PARENT1 tip) --> pi-P, pi-D
+ # pi-D (date 480) --> pi-C
+ # pi-C (date 200) --> pi-B
+ # pi-B (date 100, PARENT2 tip) --> pi-P
+ # pi-P (date 450, root)
+ #
+ # merge-base(pi-X, pi-B) = pi-B (it is an ancestor of pi-X and is
+ # itself one of the queried tips).
+ git checkout --orphan pi-orphan &&
+ test_commit --date "@450 +0000" pi-P &&
+ test_commit --date "@100 +0000" pi-B &&
+ test_commit --date "@200 +0000" pi-C &&
+ test_commit --date "@480 +0000" pi-D &&
+ GIT_AUTHOR_DATE="@500 +0000" GIT_COMMITTER_DATE="@500 +0000" \
+ git commit-tree -p pi-D -p pi-P -m pi-X pi-D^{tree} >pi-X-oid &&
+ pi_x="$(cat pi-X-oid)" &&
+ git branch -f pi-X-br "$pi_x" &&
+ git tag pi-X "$pi_x" &&
+
git commit-graph write --reachable &&
mv .git/objects/info/commit-graph commit-graph-full &&
chmod u+w commit-graph-full &&
@@ -146,6 +202,16 @@ test_expect_success 'in_merge_bases_many:miss-heuristic' '
test_all_modes in_merge_bases_many
'
+test_expect_success 'in_merge_bases_many:self' '
+ cat >input <<-\EOF &&
+ A:commit-6-8
+ X:commit-5-9
+ X:commit-6-8
+ EOF
+ echo "in_merge_bases_many(A,X):1" >expect &&
+ test_all_modes in_merge_bases_many
+'
+
test_expect_success 'is_descendant_of:hit' '
cat >input <<-\EOF &&
A:commit-5-7
@@ -183,6 +249,51 @@ test_expect_success 'get_merge_bases_many' '
test_all_modes get_merge_bases_many
'
+test_expect_success 'get_merge_bases_many:duplicate-twos' '
+ cat >input <<-\EOF &&
+ A:commit-5-7
+ X:commit-4-8
+ X:commit-4-8
+ X:commit-6-6
+ X:commit-6-6
+ X:commit-8-3
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse commit-5-6 \
+ commit-4-7 | sort
+ } >expect &&
+ test_all_modes get_merge_bases_many
+'
+
+test_expect_success 'get_merge_bases_many:pending-stale' '
+ # Exercises the (PARENT1|PARENT2) -> (...|STALE) transition path in
+ # paint_down_to_common(). See the topology comment in the setup test.
+ cat >input <<-\EOF &&
+ A:ps-T1
+ X:ps-T2
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-B
+ } >expect &&
+ test_all_modes get_merge_bases_many
+'
+
+test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+ # Exercises the push-time INFINITY-gate in paint_down_to_common(). See
+ # the pi-* topology comment in the setup test.
+ cat >input <<-\EOF &&
+ A:pi-X
+ X:pi-B
+ EOF
+ {
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse pi-B
+ } >expect &&
+ test_all_modes get_merge_bases_many
+'
+
test_expect_success 'reduce_heads' '
cat >input <<-\EOF &&
X:commit-1-10
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 1/8] Documentation/technical: add paint-down-to-common doc
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:07 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson,
Kristofer Karlsson
In-Reply-To: <pull.2149.v3.git.1782479286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
Add a technical document describing the paint_down_to_common()
algorithm used for merge-base computation, covering the paint
walk, generation number regions, and termination conditions.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
Documentation/Makefile | 1 +
Documentation/technical/meson.build | 1 +
.../technical/paint-down-to-common.adoc | 114 ++++++++++++++++++
commit-reach.c | 6 +-
4 files changed, 121 insertions(+), 1 deletion(-)
create mode 100644 Documentation/technical/paint-down-to-common.adoc
diff --git a/Documentation/Makefile b/Documentation/Makefile
index 2699f0b24a..f8dea4b395 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -129,6 +129,7 @@ TECH_DOCS += technical/long-running-process-protocol
TECH_DOCS += technical/multi-pack-index
TECH_DOCS += technical/packfile-uri
TECH_DOCS += technical/pack-heuristics
+TECH_DOCS += technical/paint-down-to-common
TECH_DOCS += technical/parallel-checkout
TECH_DOCS += technical/partial-clone
TECH_DOCS += technical/platform-support
diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build
index ec07088c57..9ce11d5e48 100644
--- a/Documentation/technical/meson.build
+++ b/Documentation/technical/meson.build
@@ -18,6 +18,7 @@ articles = [
'multi-pack-index.adoc',
'packfile-uri.adoc',
'pack-heuristics.adoc',
+ 'paint-down-to-common.adoc',
'parallel-checkout.adoc',
'partial-clone.adoc',
'platform-support.adoc',
diff --git a/Documentation/technical/paint-down-to-common.adoc b/Documentation/technical/paint-down-to-common.adoc
new file mode 100644
index 0000000000..c10d5d2887
--- /dev/null
+++ b/Documentation/technical/paint-down-to-common.adoc
@@ -0,0 +1,114 @@
+Merge-Base Computation and paint_down_to_common()
+==================================================
+
+The function `paint_down_to_common()` in `commit-reach.c` computes merge
+bases by walking the commit graph backwards from two sets of tips and
+finding where their ancestry meets.
+
+Use cases
+---------
+
+Computing merge bases is used in two different ways:
+
+ 1. *Finding all merge bases* (`merge-base --all`, `merge-tree`,
+ `merge`, `rebase`). A merge base is a common ancestor that is
+ not itself an ancestor of another common ancestor.
+
+ 2. *Ancestry checks* (`in_merge_bases`, used by `merge-base
+ --is-ancestor`, `branch -d`, `fetch`). These ask: "is commit A
+ an ancestor of commit B?" If a common ancestor equals one of the
+ inputs, that input is necessarily the only merge base -- no other
+ common ancestor can be both as recent and not an ancestor of it.
+
+Both use cases share the same algorithm and implementation.
+
+Algorithm
+---------
+
+Given a commit `one` and a set of commits `twos[]`, the walk paints
+commits with two colors:
+
+ - PARENT1: reachable from `one`
+ - PARENT2: reachable from any commit in `twos[]`
+
+The walk uses a priority queue ordered by generation number (falling
+back to commit date when generation numbers are unavailable). Each
+step dequeues the highest-priority commit (this is when we say a
+commit is "visited") and propagates its paint flags to its parents,
+enqueuing them if they gained new flags. When a commit receives
+both PARENT1 and PARENT2, it is a merge-base candidate. A candidate
+gains the STALE flag so its ancestors propagate staleness -- any
+deeper common ancestor is necessarily redundant.
+
+INFINITY and finite generation regions
+--------------------------------------
+
+The commit-graph stores a generation number for each commit. Commits
+not in the commit-graph have generation `GENERATION_NUMBER_INFINITY`. The
+graph is closed under reachability: if a commit is in the graph, all
+its ancestors are too. This partitions the commit graph into two regions:
+
+....
+ +---------------------------------------+
+ | INFINITY region |
+ | generation = INFINITY |
+ | queue order: heuristic (commit date) |
+ +---------------------------------------+
+ |
+ v
+ +---------------------------------------+
+ | Finite region |
+ | generation = finite |
+ | queue order: topological |
+ +---------------------------------------+
+....
+
+When the commit-graph is enabled, the INFINITY region is typically
+very small -- it only contains commits added since the last
+commit-graph refresh.
+
+All reachable INFINITY-generation commits are visited before any
+finite-generation commit, because INFINITY is larger than any finite
+value. Once the walk crosses into the finite region, it stays there.
+
+In the finite region, generation ordering guarantees topological
+traversal: children are always visited before their parents. This
+means that paint on already-visited commits is final -- no future
+traversal step can add paint to them.
+
+In the INFINITY region, commit-date ordering can violate this: a
+parent with a later date can be visited before a child with an earlier
+date. Paint flags are therefore NOT final at visit time, and a
+commit visited with only one side's paint may later gain the other.
+
+Paint flags are only added, never removed. Since each flag can be set
+at most once per commit, the number of times a commit can be
+re-enqueued is bounded by the number of flag transitions.
+
+Termination
+-----------
+
+The walk uses a `nonstale_queue` wrapper around `prio_queue` that
+tracks `max_nonstale`: the lowest-priority non-stale commit enqueued
+so far. Once that commit is dequeued, every remaining entry is known
+to be STALE and the loop terminates. Specifically, the main loop
+ends when one of the following conditions holds:
+
+ 1. The queue is empty.
+ 2. `max_nonstale` has been dequeued, meaning the queue only contains
+ STALE entries.
+
+Stale entry condition
+~~~~~~~~~~~~~~~~~~~~~
+Once all queued entries are stale, no new merge-base candidates can
+be discovered -- that requires at least one non-stale commit from
+each side meeting. Continuing the walk could still invalidate
+existing candidates by proving one is an ancestor of another, but
+`remove_redundant()` handles that as a post-processing step, so it
+is safe to exit early.
+
+Related documentation
+---------------------
+
+ - `Documentation/technical/commit-graph.adoc` -- generation numbers
+ and the reachability closure property.
diff --git a/commit-reach.c b/commit-reach.c
index 5df471a313..a9483759e0 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -96,7 +96,11 @@ static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue)
return commit;
}
-/* all input commits in one and twos[] must have been parsed! */
+/*
+ * See Documentation/technical/paint-down-to-common.adoc
+ *
+ * All input commits in one and twos[] must have been parsed!
+ */
static int paint_down_to_common(struct repository *r,
struct commit *one, int n,
struct commit **twos,
--
gitgitgadget
^ permalink raw reply related
* [PATCH v3 0/8] commit-reach: terminate merge-base walk when one side is exhausted
From: Kristofer Karlsson via GitGitGadget @ 2026-06-26 13:07 UTC (permalink / raw)
To: git; +Cc: Derrick Stolee, Elijah Newren, Kristofer Karlsson
In-Reply-To: <pull.2149.v2.git.1782303254.gitgitgadget@gmail.com>
commit-reach: terminate merge-base walk when one paint side is exhausted
Optimize paint_down_to_common() for merge-base queries that hit large
one-sided histories.
When the walk from one side reaches a commit with a very low generation
number that the other side never paints, the walk is forced to drain most of
the graph. A common trigger is a repository import that grafts a separate
history with its own root, but any merge that introduces a low-generation
commit never painted by the other side has the same effect.
A new merge-base candidate can only be discovered when exclusive PARENT1 and
PARENT2 paint meet. This series teaches paint_down_to_common() to stop as
soon as one side has no exclusive commits left in the queue; once one side
is exhausted, no further candidates can appear.
origin/HEAD o o PR HEAD
| |
(import) o :
/ \ /
| o merge-base
| |
: : (~2.5M commits)
| |
import root main root
In the RFC thread [1], Derrick Stolee provided a criss-cross counterexample
that sharpened the halt condition, and Elijah Newren independently
discovered the same optimization and shared an implementation in PR #2150
[2]. Patches 2-3 incorporate test cases from Elijah's branch.
This series implements the optimization only after the walk enters the
finite-generation region, where generation ordering guarantees that paint on
visited commits is final.
Patch layout:
1/8 Documentation/technical: add paint-down-to-common doc 2/8 t6600: add
test cases for side-exhaustion edge cases 3/8 t6099, t6600: add
side-exhaustion regression tests 4/8 commit-reach: add trace2
instrumentation to paint_down_to_common() 5/8 commit-reach: introduce struct
paint_state with per-side counters 6/8 commit-reach: remove unused
nonstale_queue dedup wrappers 7/8 commit-reach: terminate merge-base walk
when one paint side is exhausted 8/8 commit-reach: move min_generation check
into paint_queue_get()
Benchmarks
Step counts are deterministic (measured via trace2_data_intmax added in
patch 4). Wall-clock times are best-of-11 runs.
2.6M-commit monorepo with commit-graph:
steps wall-clock
merge-base --all (across import) 2143438 -> 3 3.67s -> 5ms
merge-base --all (1000 apart) 2692915 -> 1035 4.41s -> 7ms
merge-base --all (5000 apart) 2692915 -> 6401 4.45s -> 13ms
merge-base --all (HEAD vs import) 2698872 -> 45960 4.50s -> 79ms
merge-tree (across import) 2143438 -> 3 4.42s -> 11ms
git.git (88k commits, commit-graph):
steps wall-clock
merge-base --all v2.0.0 v2.55.0-rc1 72264 -> 44589 110ms -> 68ms
merge-base --all HEAD HEAD~1000 9891 -> 3828 18ms -> 10ms
merge-base --all HEAD HEAD~10000 72303 -> 41487 101ms -> 50ms
Changes since v2:
* New patch 8/8: moved the min_generation termination check and the
last_gen monotonicity assertion into paint_queue_get(), consolidating
halt conditions. commit_graph_generation() is now called once per
dequeued commit and shared across all checks.
* Widened the generation-monotonicity BUG assertion to fire
unconditionally, not only when min_generation is set. The side-exhaustion
optimization depends on correct generation ordering, so the assertion
should always be active. This is a behavior change: the BUG() now fires
for any generation ordering violation, regardless of the caller.
* Moved all halt conditions inside paint_queue_get() with the "pop first"
form: pop, check, then decrement counters. This keeps the optimization
commit's diff minimal (just inserting the new checks between pop and
decrement).
* Shortened the doc comment on paint_queue_get() to describe what it does
rather than how. Inline comments on each return NULL explain the specific
halt condition.
* Replaced the manual commit-graph setup in the step-count test with
run_all_modes, which now sets GIT_TRACE2_EVENT per mode and produces
trace-mode-{none,full,half,no-gdat}.txt files.
* Added a test_paint_down_steps helper for concise 4-mode step assertions
with diagnostic output on mismatch (prints "expected X, got Y" instead of
a silent grep failure).
* Added step-count assertions to the single-walk edge-case tests:
in_merge_bases_many:self, pending-stale, infinity-both-sides,
mixed-finite-infinity.
* Included step counts alongside wall-clock times in the benchmark tables.
Changes since v1:
* Reordered patches: documentation first (describing the existing
algorithm), tests before code changes, so they demonstrate passing with
old logic first.
* Dropped the ahead_behind decoupling patch. paint_state is now a NEW
struct alongside nonstale_queue instead of replacing it. ahead_behind()
is completely untouched.
* Removed nonstale_queue_put_dedup() and nonstale_queue_get_dedup() (dead
code after the conversion) in a separate commit.
* Renamed: struct paint_queue -> paint_state, field pq -> queue,
paint_count_add/remove -> paint_count_update (single function with signed
delta parameter).
* Split the old paint_count_transition (which handled both old and new
flags in one call) into separate remove/add calls with a signed delta.
This eliminates the need for the case 0 handler (which tracked "not in
the queue") and allows an exhaustive switch on (PARENT1 | PARENT2 |
STALE) that documents all valid flag combinations, with BUG() in default.
* Added trace2_data_intmax() instrumentation to report the number of
commits visited per paint walk (separate commit), with deterministic
step-count assertions in t6600.
* Expanded switch statements to multi-line format per .clang-format.
* Used !count style throughout instead of count == 0.
* Updated technical documentation alongside code changes.
[1]
https://lore.kernel.org/git/CAL71e4Ps-2_0+uuZu43N9pFnXBemoAohPs_eyRJf8taXHJPAXQ@mail.gmail.com/T/#u
[2] https://github.com/gitgitgadget/git/pull/2150
Elijah Newren (1):
t6600: add test cases for side-exhaustion edge cases
Kristofer Karlsson (7):
Documentation/technical: add paint-down-to-common doc
t6099, t6600: add side-exhaustion regression tests
commit-reach: add trace2 instrumentation to paint_down_to_common()
commit-reach: introduce struct paint_state with per-side counters
commit-reach: remove unused nonstale_queue dedup wrappers
commit-reach: terminate merge-base walk when one paint side is
exhausted
commit-reach: move min_generation check into paint_queue_get()
Documentation/Makefile | 1 +
Documentation/technical/meson.build | 1 +
.../technical/paint-down-to-common.adoc | 137 +++++++++++++
commit-reach.c | 147 ++++++++++----
t/meson.build | 1 +
t/t6099-merge-base-side-exhaustion.sh | 82 ++++++++
t/t6600-test-reach.sh | 181 ++++++++++++++++--
7 files changed, 501 insertions(+), 49 deletions(-)
create mode 100644 Documentation/technical/paint-down-to-common.adoc
create mode 100755 t/t6099-merge-base-side-exhaustion.sh
base-commit: 6c3d7b73556db708feb3b16232fab1efc4353428
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2149%2Fspkrka%2Fside-exhaust-pr-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2149/spkrka/side-exhaust-pr-v3
Pull-Request: https://github.com/gitgitgadget/git/pull/2149
Range-diff vs v2:
1: 19ed743bd1 = 1: 2593866bce Documentation/technical: add paint-down-to-common doc
2: 6151b8e0a3 = 2: 9efc084850 t6600: add test cases for side-exhaustion edge cases
3: 90f09ecb5c = 3: 14b0d86b93 t6099, t6600: add side-exhaustion regression tests
4: 6ade4df2ed ! 4: 2592264cda commit-reach: add trace2 instrumentation to paint_down_to_common()
@@ Commit message
Add a step counter and trace2_data_intmax() call so that the number
of commits visited during the paint walk is observable via
- GIT_TRACE2_PERF. This provides a way to measure the impact of
+ GIT_TRACE2_EVENT. This provides a way to measure the impact of
future optimizations without relying on wall-clock benchmarks alone.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
@@ commit-reach.c: static int paint_down_to_common(struct repository *r,
}
## t/t6600-test-reach.sh ##
-@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:mixed-finite-infinity' '
- test_all_modes get_merge_bases_many
+@@ t/t6600-test-reach.sh: test_expect_success 'setup' '
'
-+test_expect_success 'merge-base --all commit-walk steps' '
-+ test_when_finished rm -rf .git/objects/info/commit-graph \
-+ .git/objects/info/commit-graphs &&
-+ rm -rf .git/objects/info/commit-graph \
-+ .git/objects/info/commit-graphs &&
-+
-+ GIT_TRACE2_EVENT="$(pwd)/trace-none.txt" \
-+ git merge-base --all commit-9-9 commit-9-1 >actual &&
-+ test_trace2_data paint_down_to_common steps 81 <trace-none.txt &&
+ run_all_modes () {
+- test_when_finished rm -rf .git/objects/info/commit-graph &&
+- "$@" <input >actual &&
+- test_cmp expect actual &&
+- cp commit-graph-full .git/objects/info/commit-graph &&
+- "$@" <input >actual &&
+- test_cmp expect actual &&
+- cp commit-graph-half .git/objects/info/commit-graph &&
+- "$@" <input >actual &&
+- test_cmp expect actual &&
+- cp commit-graph-no-gdat .git/objects/info/commit-graph &&
+- "$@" <input >actual &&
+- test_cmp expect actual
++ graph=.git/objects/info/commit-graph &&
++ test_when_finished rm -rf "$graph" "${graph}s" &&
++ rm -f trace-mode-*.txt &&
+
-+ cp commit-graph-full .git/objects/info/commit-graph &&
-+ GIT_TRACE2_EVENT="$(pwd)/trace-full.txt" \
-+ git merge-base --all commit-9-9 commit-9-1 >actual &&
-+ test_trace2_data paint_down_to_common steps 80 <trace-full.txt &&
++ for mode in none full half no-gdat
++ do
++ rm -rf "$graph" "${graph}s" &&
++ cp "commit-graph-${mode}" "$graph" 2>/dev/null ||
++ true &&
++ GIT_TRACE2_EVENT="$(pwd)/trace-mode-${mode}.txt" \
++ "$@" <input >actual &&
++ test_cmp expect actual || return 1
++ done
+ }
+
+ test_all_modes () {
+ run_all_modes test-tool reach "$@"
+ }
+
++test_paint_down_steps () {
++ for mode in none full half no-gdat
++ do
++ test_trace2_data paint_down_to_common steps "$1" \
++ <"trace-mode-${mode}.txt" || return 1
++ shift
++ done
++}
+
-+ cp commit-graph-half .git/objects/info/commit-graph &&
-+ GIT_TRACE2_EVENT="$(pwd)/trace-half.txt" \
-+ git merge-base --all commit-9-9 commit-9-1 >actual &&
-+ test_trace2_data paint_down_to_common steps 81 <trace-half.txt
+ test_expect_success 'ref_newer:miss' '
+ cat >input <<-\EOF &&
+ A:commit-5-7
+@@ t/t6600-test-reach.sh: test_expect_success 'in_merge_bases_many:self' '
+ X:commit-6-8
+ EOF
+ echo "in_merge_bases_many(A,X):1" >expect &&
+- test_all_modes in_merge_bases_many
++ test_all_modes in_merge_bases_many &&
++ test_paint_down_steps 45 2 25 3
+ '
+
+ test_expect_success 'is_descendant_of:hit' '
+@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:pending-stale' '
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-B
+ } >expect &&
+- test_all_modes get_merge_bases_many
++ test_all_modes get_merge_bases_many &&
++ test_paint_down_steps 6 6 6 6
+ '
+
+ test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse pi-B
+ } >expect &&
+- test_all_modes get_merge_bases_many
++ test_all_modes get_merge_bases_many &&
++ test_paint_down_steps 5 5 5 5
+ '
+
+ test_expect_success 'setup mixed finite/INFINITY topology' '
+@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:mixed-finite-infinity' '
+ echo "get_merge_bases_many(A,X):" &&
+ git rev-parse ps-X
+ } >expect &&
+- test_all_modes get_merge_bases_many
++ test_all_modes get_merge_bases_many &&
++ test_paint_down_steps 3 3 3 3
+'
+
++test_expect_success 'merge-base --all commit-walk steps' '
++ >input &&
++ git rev-parse commit-9-1 >expect &&
++ run_all_modes git merge-base --all commit-9-9 commit-9-1 &&
++ test_paint_down_steps 81 80 81 81
+ '
+
test_expect_success 'reduce_heads' '
- cat >input <<-\EOF &&
- X:commit-1-10
5: f24edd45f0 ! 5: e82e0c72b6 commit-reach: introduce struct paint_state with per-side counters
@@ Commit message
(max_nonstale). This is equivalent behavior -- both conditions
detect that no non-stale entries remain.
- The existing nonstale_queue is left in place for ahead_behind().
+ paint_queue_get() uses a "pop first" form: it dequeues a commit,
+ then checks the counters. This means the loop exits one iteration
+ earlier than the old code in some topologies (the popped stale
+ commit is never processed), so a few step counts drop by one.
- Step counts (via trace2 from the previous commit) are identical
- before and after this refactoring, confirming no behavioral change.
+ The existing nonstale_queue is left in place for ahead_behind().
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
@@ commit-reach.c: static struct commit *nonstale_queue_get_dedup(struct nonstale_q
+
+static struct commit *paint_queue_get(struct paint_state *state)
+{
-+ struct commit *commit;
++ struct commit *commit = prio_queue_get(&state->queue);
++
++ if (!commit)
++ return NULL;
++
++ commit->object.flags &= ~ENQUEUED;
+
+ if (!state->p1_count && !state->p2_count &&
+ !state->pending_merge_bases)
+ return NULL;
+
-+ commit = prio_queue_get(&state->queue);
-+ if (commit) {
-+ commit->object.flags &= ~ENQUEUED;
-+ paint_count_update(state, commit->object.flags, -1);
-+ }
++ paint_count_update(state, commit->object.flags, -1);
+ return commit;
+}
+
@@ commit-reach.c: static int paint_down_to_common(struct repository *r,
trace2_data_intmax("paint_down_to_common", r,
"steps", steps);
commit_list_sort_by_date(result);
+
+ ## t/t6600-test-reach.sh ##
+@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:pending-stale' '
+ git rev-parse ps-B
+ } >expect &&
+ test_all_modes get_merge_bases_many &&
+- test_paint_down_steps 6 6 6 6
++ test_paint_down_steps 5 5 5 5
+ '
+
+ test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+@@ t/t6600-test-reach.sh: test_expect_success 'get_merge_bases_many:infinity-both-sides' '
+ git rev-parse pi-B
+ } >expect &&
+ test_all_modes get_merge_bases_many &&
+- test_paint_down_steps 5 5 5 5
++ test_paint_down_steps 5 4 5 5
+ '
+
+ test_expect_success 'setup mixed finite/INFINITY topology' '
6: 8c72f01083 = 6: e6181bf3c1 commit-reach: remove unused nonstale_queue dedup wrappers
7: d84b932e5b ! 7: f3572a8a89 commit-reach: terminate merge-base walk when one paint side is exhausted
@@ Commit message
once the walk enters the finite-generation region where ordering
guarantees hold.
+ Widen the existing generation-monotonicity BUG assertion to fire
+ unconditionally, not only when min_generation is set. The
+ side-exhaustion optimization depends on correct generation ordering,
+ so the assertion should always be active.
+
Step counts measured with trace2 on git.git with commit-graph:
merge-base --all v2.0.0 v2.55.0-rc1:
@@ Documentation/technical/paint-down-to-common.adoc: existing candidates by provin
## commit-reach.c ##
@@ commit-reach.c: static void paint_queue_put(struct paint_state *state,
+ }
+ }
++/*
++ * Dequeue the next commit for the paint walk, or return NULL when
++ * no more merge bases can be discovered.
++ */
static struct commit *paint_queue_get(struct paint_state *state)
{
-- struct commit *commit;
-+ struct commit *commit = prio_queue_get(&state->queue);
+ struct commit *commit = prio_queue_get(&state->queue);
+@@ commit-reach.c: static struct commit *paint_queue_get(struct paint_state *state)
+
+ commit->object.flags &= ~ENQUEUED;
- if (!state->p1_count && !state->p2_count &&
- !state->pending_merge_bases)
-+ if (!commit)
- return NULL;
-
-- commit = prio_queue_get(&state->queue);
-- if (commit) {
-- commit->object.flags &= ~ENQUEUED;
-- paint_count_update(state, commit->object.flags, -1);
-+ commit->object.flags &= ~ENQUEUED;
-+
+- return NULL;
+ if (!state->pending_merge_bases) {
++ /* only stale entries remain */
+ if (!state->p1_count && !state->p2_count)
+ return NULL;
-+ /*
-+ * Side exhaustion: a new merge-base can only form
-+ * when both PARENT1-only and PARENT2-only commits
-+ * remain in the queue. In the finite-generation
-+ * region the queue is ordered topologically, so
-+ * no future step can add paint to visited commits
-+ * and an exhausted side cannot reappear.
-+ */
++
++ /* one side is exhausted */
+ if ((!state->p1_count || !state->p2_count) &&
+ commit_graph_generation(commit) < GENERATION_NUMBER_INFINITY)
+ return NULL;
- }
-+
-+ paint_count_update(state, commit->object.flags, -1);
++ }
+
+ paint_count_update(state, commit->object.flags, -1);
return commit;
- }
+@@ commit-reach.c: static int paint_down_to_common(struct repository *r,
+ timestamp_t generation = commit_graph_generation(commit);
+ steps++;
+- if (min_generation && generation > last_gen)
++ if (generation > last_gen)
+ BUG("bad generation skip %"PRItime" > %"PRItime" at %s",
+ generation, last_gen,
+ oid_to_hex(&commit->object.oid));
## t/t6600-test-reach.sh ##
-@@ t/t6600-test-reach.sh: test_expect_success 'merge-base --all commit-walk steps' '
- cp commit-graph-full .git/objects/info/commit-graph &&
- GIT_TRACE2_EVENT="$(pwd)/trace-full.txt" \
- git merge-base --all commit-9-9 commit-9-1 >actual &&
-- test_trace2_data paint_down_to_common steps 80 <trace-full.txt &&
-+ test_trace2_data paint_down_to_common steps 9 <trace-full.txt &&
+@@ t/t6600-test-reach.sh: test_expect_success 'in_merge_bases_many:self' '
+ EOF
+ echo "in_merge_bases_many(A,X):1" >expect &&
+ test_all_modes in_merge_bases_many &&
+- test_paint_down_steps 45 2 25 3
++ test_paint_down_steps 45 1 25 1
+ '
- cp commit-graph-half .git/objects/info/commit-graph &&
- GIT_TRACE2_EVENT="$(pwd)/trace-half.txt" \
- git merge-base --all commit-9-9 commit-9-1 >actual &&
-- test_trace2_data paint_down_to_common steps 81 <trace-half.txt
-+ test_trace2_data paint_down_to_common steps 57 <trace-half.txt
+ test_expect_success 'is_descendant_of:hit' '
+@@ t/t6600-test-reach.sh: test_expect_success 'merge-base --all commit-walk steps' '
+ >input &&
+ git rev-parse commit-9-1 >expect &&
+ run_all_modes git merge-base --all commit-9-9 commit-9-1 &&
+- test_paint_down_steps 81 80 81 81
++ test_paint_down_steps 81 9 57 10
'
test_expect_success 'reduce_heads' '
-: ---------- > 8: 4b9f192d98 commit-reach: move min_generation check into paint_queue_get()
--
gitgitgadget
^ permalink raw reply
* Re: [PATCH GSoC v14 05/13] fetch-pack: prepare function to be moved
From: Chandra Pratap @ 2026-06-26 12:14 UTC (permalink / raw)
To: Pablo Sabater
Cc: git, chriscool, eric.peijian, gitster, jltobler, karthik.188,
peff, toon, Jonathan Tan, Calvin Wan
In-Reply-To: <20260625-ps-eric-work-rebase-v14-5-09f7ffe21a53@gmail.com>
On Thu, 25 Jun 2026 at 17:43, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
>
> `write_fetch_command_and_capabilities()` will be refactored and moved in
> subsequent commits where it will become a more general-purpose function,
> making it more accessible to additional commands in the future.
>
> To move `write_fetch_command_and_capabilities()` to `connect.c`, we
> previously need to adjust how `advertise_sid` is managed. Currently in
> `fetch_pack.c`, `advertise_sid` is a static variable, modified using
> `repo_config_get_bool()`.
>
> Initialize `advertise_sid` at the begining by directly using
> `repo_config_get_bool()`. This change is safe because:
>
> In the original `fetch-pack.c` code, there are only two places that write
> `advertise_sid`:
>
> 1. In function `do_fetch_pack()`:
> if (!server_supports("session_id"))
> advertise_sid = 0;
> 2. In function `fetch_pack_config()`:
> repo_config_get_bool("transfer.advertisesid", &advertise_sid);
>
> About 1, since `do_fetch_pack()` is only relevant for protocol v1, this
> assignment can be ignored, as `write_fetch_command_and_capabilities()`
> is only used in v2.
>
> About 2, `repo_config_get_bool()` is from `config.h` and it's an
> out-of-box dependency of `connect.c`, so we can reuse it directly.
Nit: This only explains the `advertise_sid` change in this patch. We should
also add a few lines explaining the `hash_algo` change. Maybe something
like:
While at it, change `hash_algo`'s type to `hash_algo_by_name()`'s actual
return type (`unsigned int`) and make it `const`.
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> Helped-by: Christian Couder <chriscool@tuxfamily.org>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Eric Ju <eric.peijian@gmail.com>
> Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> ---
> fetch-pack.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/fetch-pack.c b/fetch-pack.c
> index f13951d154..ad07603755 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -1380,6 +1380,9 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
> const struct string_list *server_options)
> {
> const char *hash_name;
> + int advertise_sid;
> +
> + repo_config_get_bool(the_repository, "transfer.advertisesid", &advertise_sid);
>
> ensure_server_supports_v2("fetch");
> packet_buf_write(req_buf, "command=fetch");
> @@ -1395,7 +1398,7 @@ static void write_fetch_command_and_capabilities(struct strbuf *req_buf,
> }
>
> if (server_feature_v2("object-format", &hash_name)) {
> - int hash_algo = hash_algo_by_name(hash_name);
> + const unsigned int hash_algo = hash_algo_by_name(hash_name);
> if (hash_algo_by_ptr(the_hash_algo) != hash_algo)
> die(_("mismatched algorithms: client %s; server %s"),
> the_hash_algo->name, hash_name);
>
> --
> 2.54.0
^ permalink raw reply
* Re: [PATCH GSoC v14 06/13] fetch-pack: move function to connect.c
From: Chandra Pratap @ 2026-06-26 12:14 UTC (permalink / raw)
To: Pablo Sabater
Cc: git, chriscool, eric.peijian, gitster, jltobler, karthik.188,
peff, toon, Jonathan Tan, Calvin Wan
In-Reply-To: <20260625-ps-eric-work-rebase-v14-6-09f7ffe21a53@gmail.com>
On Thu, 25 Jun 2026 at 17:43, Pablo Sabater <pabloosabaterr@gmail.com> wrote:
>
> write_fetch_command_and_capabilities will be refactored in a subsequent
Nit: The paragraph below and the preceding patches refer to this function
as `write_fetch_command_and_capabilities()`. It will be nice to maintain
consistency throughout this series.
> commit where it will become a more general-purpose function, making it
> more accessible to additional commands in the future.
>
> Move `write_fetch_command_and_capabilities()` to `connect.c`, where
> there are similar purpose functions.
>
> Because string_list is only used as a pointer, use a forward
> declaration [1].
>
> [1]: https://lore.kernel.org/git/Z0RIqUAoEob8lGfM@pks.im/
>
> Helped-by: Jonathan Tan <jonathantanmy@google.com>
> Helped-by: Christian Couder <chriscool@tuxfamily.org>
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> Signed-off-by: Eric Ju <eric.peijian@gmail.com>
> Signed-off-by: Pablo Sabater <pabloosabaterr@gmail.com>
> ---
[snip]
^ permalink raw reply
* Re: [PATCH 1/2] odb/source: generalize `reprepare()` callback
From: Toon Claes @ 2026-06-26 12:10 UTC (permalink / raw)
To: Patrick Steinhardt, git
In-Reply-To: <20260622-b4-pks-odb-generalize-prepare-v1-1-d2a5c5d13144@pks.im>
Patrick Steinhardt <ps@pks.im> writes:
> The `reprepare()` callback function can be used to flush caches of a
> given object source and then prepare it anew. This is for example used
> when a concurrent process may have written new objects. Ultimately, this
> can be seen as doing two separate steps:
>
> 1. We drop any caches.
>
> 2. We prepare the source.
>
> We have one callsite in git-grep(1) though that really only want to do
> (2). This is done by reaching into the "files" backend directly and then
> calling `odb_source_packed_prepare()`, which of course may not work with
> alternate backends.
>
> We could in theory just call `reprepare()` here, and that would likely
> not have any significant downside. But this would certainly feel like a
> code smell.
>
> Instead, generalize the `reprepare()` callback to `prepare()` with a
> flag that optionally instructs the backend to also flush the caches,
> which allows us to drop the external `odb_source_packed_prepare()`
> declaration.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/grep.c | 9 +++------
> midx.c | 2 +-
> odb.c | 2 +-
> odb.h | 8 ++++++++
> odb/source-files.c | 9 +++++----
> odb/source-inmemory.c | 5 +++--
> odb/source-loose.c | 8 +++++---
> odb/source-packed.c | 34 ++++++++++++++++------------------
> odb/source-packed.h | 9 ---------
> odb/source.h | 16 +++++++++-------
> packfile.c | 2 +-
> 11 files changed, 52 insertions(+), 52 deletions(-)
>
> diff --git a/builtin/grep.c b/builtin/grep.c
> index 8080d1bf5e..7361bf071e 100644
> --- a/builtin/grep.c
> +++ b/builtin/grep.c
> @@ -25,12 +25,11 @@
> #include "setup.h"
> #include "submodule.h"
> #include "submodule-config.h"
> -#include "object-file.h"
> #include "object-name.h"
> #include "odb.h"
> +#include "odb/source.h"
> #include "oid-array.h"
> #include "oidset.h"
> -#include "packfile.h"
> #include "pager.h"
> #include "path.h"
> #include "promisor-remote.h"
> @@ -1361,10 +1360,8 @@ int cmd_grep(int argc,
> struct odb_source *source;
>
> odb_prepare_alternates(the_repository->objects);
> - for (source = the_repository->objects->sources; source; source = source->next) {
> - struct odb_source_files *files = odb_source_files_downcast(source);
So you're downcasting inside the implementation by the backends itself.
That makes sense, but would it be worth to say something about that in
the commit message?
> - odb_source_packed_prepare(files->packed);
> - }
> + for (source = the_repository->objects->sources; source; source = source->next)
> + odb_source_prepare(source, 0);
> }
>
> start_threads(&opt);
> diff --git a/midx.c b/midx.c
> index cc6b94f9dd..76c3f92cc3 100644
> --- a/midx.c
> +++ b/midx.c
> @@ -101,7 +101,7 @@ static int midx_read_object_offsets(const unsigned char *chunk_start,
>
> struct multi_pack_index *get_multi_pack_index(struct odb_source_packed *source)
> {
> - odb_source_packed_prepare(source);
> + odb_source_prepare(&source->base, 0);
> return source->midx;
> }
>
> diff --git a/odb.c b/odb.c
> index 965ef68e4e..7b45390e12 100644
> --- a/odb.c
> +++ b/odb.c
> @@ -1086,7 +1086,7 @@ void odb_reprepare(struct object_database *o)
> odb_prepare_alternates(o);
>
> for (source = o->sources; source; source = source->next)
> - odb_source_reprepare(source);
> + odb_source_prepare(source, ODB_PREPARE_FLUSH_CACHES);
>
> o->object_count_valid = 0;
>
> diff --git a/odb.h b/odb.h
> index 0030467a52..c14c9030e4 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -124,6 +124,14 @@ void odb_free(struct object_database *o);
> */
> void odb_close(struct object_database *o);
>
> +enum odb_prepare_flags {
> + /*
> + * Flush caches, reload alternates and then re-prepare each object
> + * source so that new objects may become accessible.
> + */
> + ODB_PREPARE_FLUSH_CACHES = (1 << 0),
> +};
> +
> /*
> * Clear caches, reload alternates and then reload object sources so that new
> * objects may become accessible.
> diff --git a/odb/source-files.c b/odb/source-files.c
> index 3bc6419dd7..ad9e0b52f9 100644
> --- a/odb/source-files.c
> +++ b/odb/source-files.c
> @@ -41,11 +41,12 @@ static void odb_source_files_close(struct odb_source *source)
> odb_source_close(&files->packed->base);
> }
>
> -static void odb_source_files_reprepare(struct odb_source *source)
> +static void odb_source_files_prepare(struct odb_source *source,
> + enum odb_prepare_flags flags)
> {
> struct odb_source_files *files = odb_source_files_downcast(source);
> - odb_source_reprepare(&files->loose->base);
> - odb_source_reprepare(&files->packed->base);
> + odb_source_prepare(&files->loose->base, flags);
> + odb_source_prepare(&files->packed->base, flags);
> }
>
> static int odb_source_files_read_object_info(struct odb_source *source,
> @@ -273,7 +274,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
>
> files->base.free = odb_source_files_free;
> files->base.close = odb_source_files_close;
> - files->base.reprepare = odb_source_files_reprepare;
> + files->base.prepare = odb_source_files_prepare;
> files->base.read_object_info = odb_source_files_read_object_info;
> files->base.read_object_stream = odb_source_files_read_object_stream;
> files->base.for_each_object = odb_source_files_for_each_object;
> diff --git a/odb/source-inmemory.c b/odb/source-inmemory.c
> index e004566d76..cc5e9e62cb 100644
> --- a/odb/source-inmemory.c
> +++ b/odb/source-inmemory.c
> @@ -325,7 +325,8 @@ static void odb_source_inmemory_close(struct odb_source *source UNUSED)
> {
> }
>
> -static void odb_source_inmemory_reprepare(struct odb_source *source UNUSED)
> +static void odb_source_inmemory_prepare(struct odb_source *source UNUSED,
> + enum odb_prepare_flags flags UNUSED)
> {
> }
>
> @@ -365,7 +366,7 @@ struct odb_source_inmemory *odb_source_inmemory_new(struct object_database *odb)
>
> source->base.free = odb_source_inmemory_free;
> source->base.close = odb_source_inmemory_close;
> - source->base.reprepare = odb_source_inmemory_reprepare;
> + source->base.prepare = odb_source_inmemory_prepare;
> source->base.read_object_info = odb_source_inmemory_read_object_info;
> source->base.read_object_stream = odb_source_inmemory_read_object_stream;
> source->base.for_each_object = odb_source_inmemory_for_each_object;
> diff --git a/odb/source-loose.c b/odb/source-loose.c
> index 7d7ea2fb84..af46316e35 100644
> --- a/odb/source-loose.c
> +++ b/odb/source-loose.c
> @@ -672,10 +672,12 @@ static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
> sizeof(loose->subdir_seen));
> }
>
> -static void odb_source_loose_reprepare(struct odb_source *source)
> +static void odb_source_loose_prepare(struct odb_source *source,
> + enum odb_prepare_flags flags)
> {
> struct odb_source_loose *loose = odb_source_loose_downcast(source);
> - odb_source_loose_clear_cache(loose);
> + if (flags & ODB_PREPARE_FLUSH_CACHES)
> + odb_source_loose_clear_cache(loose);
> }
>
> static void odb_source_loose_close(struct odb_source *source UNUSED)
> @@ -716,7 +718,7 @@ struct odb_source_loose *odb_source_loose_new(struct object_database *odb,
>
> loose->base.free = odb_source_loose_free;
> loose->base.close = odb_source_loose_close;
> - loose->base.reprepare = odb_source_loose_reprepare;
> + loose->base.prepare = odb_source_loose_prepare;
> loose->base.read_object_info = odb_source_loose_read_object_info;
> loose->base.read_object_stream = odb_source_loose_read_object_stream;
> loose->base.for_each_object = odb_source_loose_for_each_object;
> diff --git a/odb/source-packed.c b/odb/source-packed.c
> index 42c28fba0e..fa5a072488 100644
> --- a/odb/source-packed.c
> +++ b/odb/source-packed.c
> @@ -15,7 +15,7 @@ static int find_pack_entry(struct odb_source_packed *store,
> {
> struct packfile_list_entry *l;
>
> - odb_source_packed_prepare(store);
> + odb_source_prepare(&store->base, 0);
Why are you not using ODB_PREPARE_FLUSH_CACHES here? It used to do
before?
> if (store->midx && fill_midx_entry(store->midx, oid, e))
> return 1;
>
> @@ -47,7 +47,7 @@ static int odb_source_packed_read_object_info(struct odb_source *source,
> * been added since the last time we have prepared the packfile store.
> */
> if (flags & OBJECT_INFO_SECOND_READ)
> - odb_source_reprepare(source);
> + odb_source_prepare(source, ODB_PREPARE_FLUSH_CACHES);
I think the new code is correct, but why wasn't `packed` used here in
the past? The old odb_source_reprepare() expected a downcasted, didn't
it?
>
> if (!find_pack_entry(packed, oid, &e))
> return 1;
> @@ -668,27 +668,25 @@ static int sort_pack(const struct packfile_list_entry *a,
> return -1;
> }
>
> -void odb_source_packed_prepare(struct odb_source_packed *source)
> +static void odb_source_packed_prepare(struct odb_source *source,
> + enum odb_prepare_flags flags)
> {
> - if (source->initialized)
> + struct odb_source_packed *packed = odb_source_packed_downcast(source);
> +
> + if (flags & ODB_PREPARE_FLUSH_CACHES)
> + packed->initialized = false;
> + if (packed->initialized)
> return;
>
> - prepare_multi_pack_index_one(source);
> - prepare_packed_git_one(source);
> + prepare_multi_pack_index_one(packed);
> + prepare_packed_git_one(packed);
>
> - sort_packs(&source->packs.head, sort_pack);
> - for (struct packfile_list_entry *e = source->packs.head; e; e = e->next)
> + sort_packs(&packed->packs.head, sort_pack);
> + for (struct packfile_list_entry *e = packed->packs.head; e; e = e->next)
> if (!e->next)
> - source->packs.tail = e;
> + packed->packs.tail = e;
>
> - source->initialized = true;
> -}
> -
> -static void odb_source_packed_reprepare(struct odb_source *source)
> -{
> - struct odb_source_packed *packed = odb_source_packed_downcast(source);
> - packed->initialized = false;
> - odb_source_packed_prepare(packed);
> + packed->initialized = true;
> }
>
> static void odb_source_packed_reparent(const char *name UNUSED,
> @@ -744,7 +742,7 @@ struct odb_source_packed *odb_source_packed_new(struct object_database *odb,
>
> packed->base.free = odb_source_packed_free;
> packed->base.close = odb_source_packed_close;
> - packed->base.reprepare = odb_source_packed_reprepare;
> + packed->base.prepare = odb_source_packed_prepare;
> packed->base.read_object_info = odb_source_packed_read_object_info;
> packed->base.read_object_stream = odb_source_packed_read_object_stream;
> packed->base.for_each_object = odb_source_packed_for_each_object;
> diff --git a/odb/source-packed.h b/odb/source-packed.h
> index 88994098c1..d5230ac68c 100644
> --- a/odb/source-packed.h
> +++ b/odb/source-packed.h
> @@ -82,13 +82,4 @@ static inline struct odb_source_packed *odb_source_packed_downcast(struct odb_so
> return container_of(source, struct odb_source_packed, base);
> }
>
> -/*
> - * Prepare the source by loading packfiles and multi-pack indices for
> - * all alternates. This becomes a no-op if the source is already prepared.
> - *
> - * It shouldn't typically be necessary to call this function directly, as
> - * functions that access the source know to prepare it.
> - */
> -void odb_source_packed_prepare(struct odb_source_packed *source);
> -
> #endif
> diff --git a/odb/source.h b/odb/source.h
> index b9a7642b2c..bbf1da3819 100644
> --- a/odb/source.h
> +++ b/odb/source.h
> @@ -83,11 +83,12 @@ struct odb_source {
> void (*close)(struct odb_source *source);
>
> /*
> - * This callback is expected to clear underlying caches of the object
> - * database source. The function is called when the repository has for
> - * example just been repacked so that new objects will become visible.
> + * This callback is expected to prepare the source so that it becomes
> + * ready for use. It optionally clears underlying caches of the object
> + * database source.
> */
> - void (*reprepare)(struct odb_source *source);
> + void (*prepare)(struct odb_source *source,
> + enum odb_prepare_flags flags);
>
> /*
> * This callback is expected to read object information from the object
> @@ -308,13 +309,14 @@ static inline void odb_source_close(struct odb_source *source)
> }
>
> /*
> - * Reprepare the object database source and clear any caches. Depending on the
> + * Prepare the object database source and clear any caches. Depending on the
> * backend used this may have the effect that concurrently-written objects
> * become visible.
> */
> -static inline void odb_source_reprepare(struct odb_source *source)
> +static inline void odb_source_prepare(struct odb_source *source,
> + enum odb_prepare_flags flags)
> {
> - source->reprepare(source);
> + source->prepare(source, flags);
> }
>
> /*
> diff --git a/packfile.c b/packfile.c
> index 59cee7925d..d78fae981a 100644
> --- a/packfile.c
> +++ b/packfile.c
> @@ -855,7 +855,7 @@ void for_each_file_in_pack_dir(const char *objdir,
>
> struct packfile_list_entry *packfile_store_get_packs(struct odb_source_packed *store)
> {
> - odb_source_packed_prepare(store);
> + odb_source_prepare(&store->base, 0);
>
> if (store->midx) {
> struct multi_pack_index *m = store->midx;
>
> --
> 2.55.0.rc1.745.g43192e7977.dirty
>
>
--
Cheers,
Toon
^ permalink raw reply
* Re: [PATCH 2/2] odb: introduce `odb_prepare()`
From: Toon Claes @ 2026-06-26 12:09 UTC (permalink / raw)
To: Patrick Steinhardt, git
In-Reply-To: <20260622-b4-pks-odb-generalize-prepare-v1-2-d2a5c5d13144@pks.im>
Patrick Steinhardt <ps@pks.im> writes:
> Introduce `odb_prepare()` as a simple wrapper to prepare alternates and
> then prepare each individual source. Adapt git-grep(1) to use it.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> builtin/grep.c | 9 ++-------
> odb.c | 18 ++++++++++++------
> odb.h | 8 ++++++--
> 3 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/builtin/grep.c b/builtin/grep.c
> index 7361bf071e..a7252d56a1 100644
> --- a/builtin/grep.c
> +++ b/builtin/grep.c
> @@ -1356,13 +1356,8 @@ int cmd_grep(int argc,
> if (recurse_submodules)
> repo_read_gitmodules(the_repository, 1);
>
> - if (startup_info->have_repository) {
> - struct odb_source *source;
> -
> - odb_prepare_alternates(the_repository->objects);
> - for (source = the_repository->objects->sources; source; source = source->next)
> - odb_source_prepare(source, 0);
> - }
> + if (startup_info->have_repository)
> + odb_prepare(the_repository->objects, 0);
>
> start_threads(&opt);
> } else {
> diff --git a/odb.c b/odb.c
> index 7b45390e12..11414c49a8 100644
> --- a/odb.c
> +++ b/odb.c
> @@ -1070,7 +1070,7 @@ void odb_free(struct object_database *o)
> free(o);
> }
>
> -void odb_reprepare(struct object_database *o)
> +void odb_prepare(struct object_database *o, enum odb_prepare_flags flags)
> {
> struct odb_source *source;
>
> @@ -1082,13 +1082,19 @@ void odb_reprepare(struct object_database *o)
> * the linked list, so existing odbs will continue to exist for
> * the lifetime of the process.
> */
> - o->loaded_alternates = 0;
> - odb_prepare_alternates(o);
> + if (flags & ODB_PREPARE_FLUSH_CACHES) {
> + o->loaded_alternates = 0;
> + o->object_count_valid = 0;
> + }
>
> + odb_prepare_alternates(o);
> for (source = o->sources; source; source = source->next)
> - odb_source_prepare(source, ODB_PREPARE_FLUSH_CACHES);
> -
> - o->object_count_valid = 0;
> + odb_source_prepare(source, flags);
>
> obj_read_unlock();
> }
> +
> +void odb_reprepare(struct object_database *o)
> +{
> + odb_prepare(o, ODB_PREPARE_FLUSH_CACHES);
> +}
> diff --git a/odb.h b/odb.h
> index c14c9030e4..b1c0f3767b 100644
> --- a/odb.h
> +++ b/odb.h
> @@ -133,9 +133,13 @@ enum odb_prepare_flags {
> };
>
> /*
> - * Clear caches, reload alternates and then reload object sources so that new
> - * objects may become accessible.
> + * Prepare the object database for use. Calling this function is generally not
> + * needed, but can be useful in case the caller wants to pre-open individual
> + * sources.
> */
> +void odb_prepare(struct object_database *o, enum odb_prepare_flags flags);
> +
> +/* Equivalent to `odb_prepare(o, ODB_PREPARE_FLUSH_CACHES)`. */
> void odb_reprepare(struct object_database *o);
According to my grep results are there 17 callsites for odb_reprepare(),
then I agree it makes sense to create this wrapper.
--
Cheers,
Toon
^ permalink raw reply
* Re: [PATCH GSoC v14 02/13] git-compat-util: add strtoul_szt() with error handling
From: Pablo Sabater @ 2026-06-26 12:00 UTC (permalink / raw)
To: Junio C Hamano
Cc: git, chandrapratap3519, chriscool, eric.peijian, jltobler,
karthik.188, peff, toon
In-Reply-To: <xmqqjyrme393.fsf@gitster.g>
El jue, 25 jun 2026 a las 23:09, Junio C Hamano (<gitster@pobox.com>) escribió:
>
> Pablo Sabater <pabloosabaterr@gmail.com> writes:
>
> > From: Eric Ju <eric.peijian@gmail.com>
> >
> > We already have strtoul_ui() and similar functions that provide proper
> > error handling using strtoul from the standard library. However,
> > there isn't currently a variant that returns an unsigned long.
>
> But this one no longer returns an unsigned long anymore ;-)
True, I missed that.
>
> > This variant is needed in a subsequent commit to enable returning an
> > size_t with proper error handling.
>
> I think it would allow a lot of code paths that want to deal with
> size_t not to worry about "is ulong large enough?" to have a
> function like this, but for that to happen, the implementation of
> the function must carefully think through if these steps do sensible
> things on platforms with too small ulong (which often is OK when we
> are coming from decimal string to ulong and then to size_t) and too
> large ulong (which is not OK, when coming from decimal string to
> ulong which might be fine, but will bust the size of the final
> type), etc.
Ok, so the strtoul_szt() is not a bad idea but it is not ok how I did
it. What about using uintmax_t and strtoumax() and after that (before
casting) check if it fits into a size_t?
Something like:
static inline int strtoumax_szt(char const *s, int base, size_t *result)
{
uintmax_t val;
char *p;
errno = 0;
/* negative values would be accepted by strtoul */
if (strchr(s, '-'))
return -1;
val = strtoumax(s, &p, base);
if ((errno || *p || p == s) || val > SIZE_MAX)
return -1;
*result = val;
return 0;
}
Alternatively I could go back to the unsigned long version and make
the relevant checks on the caller which is only one place at
`fetch_object_info()`
>
> Also, would it make sense to add yet another "static inline" like
> this? After the dust settles, we may want to rethink these strtoX
> wrappers we have, benchmark, and possibly make them into a proper
> library function, not "static inline" that may bloat the runtime.
Yeah, I thought the same, I kept it on the header also to avoid
bloating this series too much because it's kinda big already. But a
follow-up after would be a good idea tho.
>
> > diff --git a/git-compat-util.h b/git-compat-util.h
> > index 8809776407..7f417f1acf 100644
> > --- a/git-compat-util.h
> > +++ b/git-compat-util.h
> > @@ -975,6 +975,26 @@ static inline int strtoul_ui(char const *s, int base, unsigned int *result)
> > return 0;
> > }
> >
> > +/*
> > + * Convert a string to a size_t using the standard library's strtoul, with
> > + * additional error handling to ensure robustness.
> > + */
> > +static inline int strtoul_szt(char const *s, int base, size_t *result)
> > +{
> > + unsigned long ul;
> > + char *p;
> > +
> > + errno = 0;
> > + /* negative values would be accepted by strtoul */
> > + if (strchr(s, '-'))
> > + return -1;
> > + ul = strtoul(s, &p, base);
> > + if (errno || *p || p == s)
> > + return -1;
> > + *result = ul;
> > + return 0;
> > +}
> > +
> > static inline int strtol_i(char const *s, int base, int *result)
> > {
> > long ul;
Thanks for the review,
Pablo.
^ permalink raw reply
* Re: [RFH] Why do osx CI jobs so unreliable?
From: Patrick Steinhardt @ 2026-06-26 10:50 UTC (permalink / raw)
To: Jeff King; +Cc: Michael Montalbo, git, Junio C Hamano
In-Reply-To: <20260626051657.GB3138423@coredump.intra.peff.net>
On Fri, Jun 26, 2026 at 01:16:57AM -0400, Jeff King wrote:
> On Thu, Jun 25, 2026 at 08:27:35PM -0700, Michael Montalbo wrote:
>
> > I think that is the trigger for issues we've been seeing. I spent
> > some time investigating the Apache side over the last week and maybe
> > found a mod_http2 bug, which I filed upstream with a potential fix:
> >
> > bug: https://bz.apache.org/bugzilla/show_bug.cgi?id=70131
> > fix: https://github.com/mmontalbo/httpd/pull/2
>
> Thanks both of you for digging into this. I'm not familiar enough with
> Apache's code to pass confident judgement, but your findings certainly
> convinced me that this is just an apache bug.
The bug manifests both with HTTP/1.1 and HTTP/2 though, so this wouldn't
fully fix the flakes we see, right?
> > Given there could be a potential reliability issue with an upstream
> > dependency like Apache, I was considering what mitigation strategies
> > might help:
> > [...]
>
> Depending on how widespread the Apache bug is, another option might just
> be: do nothing and wait for it to get fixed.
>
> Trying to make the wedged state fail fast and loudly is mostly just
> punting on the problem. We'd still see spurious failures. We've so far
> resisted the urge to do any automatic flaky-test retries, preferring
> instead to just try to root out the flakes. I'm a little hesitant to
> start now, because I think our strategy has mostly been good so far, and
> I've seen some horrible counter-examples where flakes and retries become
> a routine drag on development (and I'm afraid that accommodating flakes
> might make them more common).
I agree. I'm not a fan of retry logic, as every flaky test may mask an
actual bug that we haven't fully investigated yet.
> > - Make slow tests faster by optimizing the test itself and/or
> > the test runner configuration (e.g., job number matching
> > cores) so wedges become less likely.
>
> It sounds like the bad state is triggered when Apache hits a timeout,
> and we hit that timeout because the system is slow or busy. We could try
> to make things less slow, but would it work equally well to increase
> that timeout?
I was also wondering whether we can maybe work around the issue by
increasing the Apache timeout value. That sounds like an easy potential
solution to try, and from all we've discovered so far it doesn't feel
like this is something we can address on the Git side.
Patrick
^ permalink raw reply
* Re: [PATCH v5 0/4] history: add squash subcommand to fold a range
From: Harald Nordgren @ 2026-06-26 9:57 UTC (permalink / raw)
To: phillip.wood; +Cc: Harald Nordgren via GitGitGadget, git, Patrick Steinhardt
In-Reply-To: <d37e8f4f-d1f9-45aa-8c95-ebe676d54671@gmail.com>
On Fri, Jun 26, 2026 at 10:53 AM Phillip Wood <phillip.wood123@gmail.com> wrote:
>
> Hi Harald
>
> On 24/06/2026 22:54, Harald Nordgren via GitGitGadget wrote:
> > Adds git history squash <revision-range> to fold a range of commits.
>
> It would be helpful to give a bit more detail here about the command so
> that the reader has an overview of what is actually being implemented.
>
> - what does it do with fixup!, squash! and amend! commits? Can it use
> the message from amend! commits to reword the commit?
> - can the user reword the commit message?
> - what happens if a merge commit inside the range has a parent outside
> the range?
> - what happens to branches that point to commits inside the range?
>
> I had a quick play and found that it accepts ranges that containing a
> single commit (e.g. @^!) where there is nothing to squash. It also
> accepts ranges that are not ancestors of HEAD (e.g. checkout master and
> run "git history squash --dry-run origin/seen^2^!") without printing an
> error message.
Good points, I will take a look at clarifying or giving an error (like
in the case of ancestor not in history of HEAD).
Only accepting a single argument is quite limiting as one
> cannot say
>
> git history squash ^:/base :/tip
I don't understand why this is limiting? It thought it was clear that
it should be one argument REF1..REF2 ? What does '^:/base :/tip'
achieve that '^:/base..:/tip' cannot?
Harald
^ permalink raw reply
* Re: [PATCH v5 0/4] history: add squash subcommand to fold a range
From: Phillip Wood @ 2026-06-26 8:52 UTC (permalink / raw)
To: Harald Nordgren via GitGitGadget, git; +Cc: Harald Nordgren, Patrick Steinhardt
In-Reply-To: <pull.2337.v5.git.git.1782338102.gitgitgadget@gmail.com>
Hi Harald
On 24/06/2026 22:54, Harald Nordgren via GitGitGadget wrote:
> Adds git history squash <revision-range> to fold a range of commits.
It would be helpful to give a bit more detail here about the command so
that the reader has an overview of what is actually being implemented.
- what does it do with fixup!, squash! and amend! commits? Can it use
the message from amend! commits to reword the commit?
- can the user reword the commit message?
- what happens if a merge commit inside the range has a parent outside
the range?
- what happens to branches that point to commits inside the range?
I had a quick play and found that it accepts ranges that containing a
single commit (e.g. @^!) where there is nothing to squash. It also
accepts ranges that are not ancestors of HEAD (e.g. checkout master and
run "git history squash --dry-run origin/seen^2^!") without printing an
error message. Only accepting a single argument is quite limiting as one
cannot say
git history squash ^:/base :/tip
Thanks
Phillip
> Changes in v5:
>
> * The range walk now uses --ancestry-path, so only commits descended from
> the base are folded; a single revision such as HEAD or HEAD~1 is now
> rejected as "not a <base>..<tip> range" rather than treated as a squash
> down to the root.
> * This adopts the --ancestry-path suggestion; the multi-base rejection is
> unchanged, so a side branch that forked before the base and merged in is
> still refused.
> * Added tests covering more merge topologies: two interior merges, a nested
> merge, an octopus merge, an octopus arm forked before the base, a merge
> among the descendants replayed above the range, and a ref pointing at an
> interior merge commit.
>
> Changes in v4:
>
> * git history squash now detects when another ref points at a commit inside
> the range being folded and refuses, with an advice.historyUpdateRefs hint
> to use --update-refs=head.
> * A merge inside the range is folded fine as long as the range has a single
> base; a range with merge commit at the tip or base also folds correctly.
> Only a range with more than one base is rejected.
>
> Changes in v3:
>
> * Moved the feature out of git rebase and into a new git history squash
> <revision-range> subcommand, per the list discussion. git rebase --squash
> is dropped.
> * Takes an arbitrary range (git history squash @~3.., git history squash
> @~5..@~2), folding it into the oldest commit and replaying any
> descendants on top.
> * Implemented as a single tree operation rather than picking each commit,
> so there are no repeated conflict stops (addresses Phillip's efficiency
> point).
> * A merge inside the range is folded fine, only a range with more than one
> base is rejected.
> * --reedit-message seeds the editor with every folded-in message, not just
> the oldest.
>
> Harald Nordgren (4):
> history: extract helper for a commit's parent tree
> history: give commit_tree_ext a message template
> history: add squash subcommand to fold a range
> history: re-edit a squash with every message
>
> Documentation/config/advice.adoc | 4 +
> Documentation/git-history.adoc | 26 ++
> advice.c | 1 +
> advice.h | 1 +
> builtin/history.c | 341 ++++++++++++++++++---
> t/meson.build | 1 +
> t/t3455-history-squash.sh | 497 +++++++++++++++++++++++++++++++
> 7 files changed, 833 insertions(+), 38 deletions(-)
> create mode 100755 t/t3455-history-squash.sh
>
>
> base-commit: 26d8d94e94df5535eecd036f16627493506a0614
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-2337%2FHaraldNordgren%2Frebase-fixup-fold-v5
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-2337/HaraldNordgren/rebase-fixup-fold-v5
> Pull-Request: https://github.com/git/git/pull/2337
>
> Range-diff vs v4:
>
> 1: fc2801c0b1 = 1: 0f1ae9b05a history: extract helper for a commit's parent tree
> 2: ee591e83b4 = 2: a97ffab1e6 history: give commit_tree_ext a message template
> 3: 80bfea642e ! 3: 04e18ef979 history: add squash subcommand to fold a range
> @@ builtin/history.c: out:
> + struct rev_info revs;
> + struct commit *commit, *base = NULL, *oldest = NULL, *tip = NULL;
> + struct strvec args = STRVEC_INIT;
> ++ size_t i;
> + int ret;
> +
> + repo_init_revisions(repo, &revs, NULL);
> @@ builtin/history.c: out:
> + strvec_push(&args, "--reverse");
> + strvec_push(&args, "--topo-order");
> + strvec_push(&args, "--boundary");
> ++ strvec_push(&args, "--ancestry-path");
> + strvec_push(&args, range);
> + setup_revisions_from_strvec(&args, &revs, NULL);
> + if (args.nr != 1) {
> @@ builtin/history.c: out:
> + goto out;
> + }
> +
> ++ /*
> ++ * A squash needs a base to reparent onto, so the argument has to
> ++ * exclude something, as in "<base>..<tip>". A single revision has no
> ++ * such bottom commit and cannot be squashed.
> ++ */
> ++ for (i = 0; i < revs.cmdline.nr; i++)
> ++ if (revs.cmdline.rev[i].flags & UNINTERESTING)
> ++ break;
> ++ if (i == revs.cmdline.nr) {
> ++ ret = error(_("'%s' is not a '<base>..<tip>' range"), range);
> ++ goto out;
> ++ }
> ++
> + if (prepare_revision_walk(&revs) < 0) {
> + ret = error(_("error preparing revisions"));
> + goto out;
> @@ builtin/history.c: out:
> + goto out;
> + }
> +
> -+ if (!base) {
> -+ ret = error(_("cannot squash the root commit"));
> -+ goto out;
> -+ }
> ++ if (!base)
> ++ BUG("a non-empty range must have a boundary commit");
> +
> + *base_out = base;
> + *oldest_out = oldest;
> @@ t/t3455-history-squash.sh (new)
> + test_grep "the range .* is empty" err
> +'
> +
> -+test_expect_success 'errors when the range includes the root commit' '
> ++test_expect_success 'errors on a single revision that is not a range' '
> + test_must_fail git history squash HEAD 2>err &&
> -+ test_grep "cannot squash the root commit" err
> ++ test_grep "is not a .*range" err &&
> ++ test_must_fail git history squash HEAD~1 2>err &&
> ++ test_grep "is not a .*range" err
> +'
> +
> +test_expect_success 'squashes a range into a single commit without changing the tree' '
> @@ t/t3455-history-squash.sh (new)
> + test_path_is_file inner
> +'
> +
> ++test_expect_success 'folds a merge of a branch that forked at the base' '
> ++ git reset --hard start &&
> ++ git checkout -b base-fork-side &&
> ++ test_commit --no-tag base-fork-side side x &&
> ++ git checkout - &&
> ++ test_commit --no-tag base-fork-main file b &&
> ++ git merge --no-ff -m "merge base-fork-side" base-fork-side &&
> ++ git branch -D base-fork-side &&
> ++ test_commit --no-tag base-fork-tail file c &&
> ++ tip_tree=$(git rev-parse HEAD^{tree}) &&
> ++
> ++ git history squash start.. &&
> ++
> ++ git rev-list --count start..HEAD >count &&
> ++ echo 1 >expect &&
> ++ test_cmp expect count &&
> ++ test_cmp_rev start HEAD^ &&
> ++ test "$tip_tree" = "$(git rev-parse HEAD^{tree})" &&
> ++ test_path_is_file side
> ++'
> ++
> +test_expect_success 'folds a range whose tip is a merge commit' '
> + git reset --hard start &&
> + test_commit --no-tag tipmerge-base file b &&
> @@ t/t3455-history-squash.sh (new)
> + test_cmp_rev "$merged" HEAD
> +'
> +
> ++test_expect_success 'folds a range with two interior merges' '
> ++ git reset --hard start &&
> ++ test_commit --no-tag two-merge-a file a1 &&
> ++ git checkout -b two-merge-s1 &&
> ++ test_commit --no-tag two-merge-s1 s1 x &&
> ++ git checkout - &&
> ++ git merge --no-ff -m "merge s1" two-merge-s1 &&
> ++ test_commit --no-tag two-merge-b file b1 &&
> ++ git checkout -b two-merge-s2 &&
> ++ test_commit --no-tag two-merge-s2 s2 y &&
> ++ git checkout - &&
> ++ git merge --no-ff -m "merge s2" two-merge-s2 &&
> ++ git branch -D two-merge-s1 two-merge-s2 &&
> ++ tip_tree=$(git rev-parse HEAD^{tree}) &&
> ++
> ++ git history squash start.. &&
> ++
> ++ git rev-list --count start..HEAD >count &&
> ++ echo 1 >expect &&
> ++ test_cmp expect count &&
> ++ test "$tip_tree" = "$(git rev-parse HEAD^{tree})" &&
> ++ test_path_is_file s1 &&
> ++ test_path_is_file s2
> ++'
> ++
> ++test_expect_success 'folds a range with a nested merge' '
> ++ git reset --hard start &&
> ++ main=$(git symbolic-ref --short HEAD) &&
> ++ git checkout -b nested-outer &&
> ++ test_commit --no-tag nested-outer outer x &&
> ++ git checkout -b nested-inner &&
> ++ test_commit --no-tag nested-inner inner y &&
> ++ git checkout nested-outer &&
> ++ git merge --no-ff -m "merge inner" nested-inner &&
> ++ git checkout "$main" &&
> ++ test_commit --no-tag nested-main file b1 &&
> ++ git merge --no-ff -m "merge outer" nested-outer &&
> ++ git branch -D nested-outer nested-inner &&
> ++ tip_tree=$(git rev-parse HEAD^{tree}) &&
> ++
> ++ git history squash start.. &&
> ++
> ++ git rev-list --count start..HEAD >count &&
> ++ echo 1 >expect &&
> ++ test_cmp expect count &&
> ++ test "$tip_tree" = "$(git rev-parse HEAD^{tree})" &&
> ++ test_path_is_file outer &&
> ++ test_path_is_file inner
> ++'
> ++
> ++test_expect_success 'folds a range with an octopus merge' '
> ++ git reset --hard start &&
> ++ main=$(git symbolic-ref --short HEAD) &&
> ++ test_commit --no-tag octo-base file a1 &&
> ++ git checkout -b octo-1 &&
> ++ test_commit --no-tag octo-1 o1 x &&
> ++ git checkout "$main" &&
> ++ git checkout -b octo-2 &&
> ++ test_commit --no-tag octo-2 o2 y &&
> ++ git checkout "$main" &&
> ++ git merge --no-ff -m octopus octo-1 octo-2 &&
> ++ git branch -D octo-1 octo-2 &&
> ++ tip_tree=$(git rev-parse HEAD^{tree}) &&
> ++
> ++ git history squash start.. &&
> ++
> ++ git rev-list --count start..HEAD >count &&
> ++ echo 1 >expect &&
> ++ test_cmp expect count &&
> ++ test "$tip_tree" = "$(git rev-parse HEAD^{tree})" &&
> ++ test_path_is_file o1 &&
> ++ test_path_is_file o2
> ++'
> ++
> ++test_expect_success 'refuses an octopus merge with an arm forked before the base' '
> ++ git reset --hard start &&
> ++ main=$(git symbolic-ref --short HEAD) &&
> ++ git checkout -b octo-pre &&
> ++ test_commit octo-pre-side pside x &&
> ++ git checkout "$main" &&
> ++ test_commit octo-pre-main file b1 &&
> ++ octo_base=$(git rev-parse HEAD) &&
> ++ git checkout -b octo-within &&
> ++ test_commit --no-tag octo-within wside y &&
> ++ git checkout "$main" &&
> ++ git merge --no-ff -m octopus octo-pre octo-within &&
> ++ merged=$(git rev-parse HEAD) &&
> ++ git branch -D octo-pre octo-within &&
> ++
> ++ test_must_fail git history squash "$octo_base.." 2>err &&
> ++ test_grep "more than one base" err &&
> ++ test_cmp_rev "$merged" HEAD
> ++'
> ++
> ++test_expect_success 'refuses when a descendant above the range is a merge' '
> ++ git reset --hard start &&
> ++ main=$(git symbolic-ref --short HEAD) &&
> ++ test_commit --no-tag desc-base file b &&
> ++ git tag desc-tip &&
> ++ git checkout -b desc-above &&
> ++ test_commit --no-tag desc-above above x &&
> ++ git checkout "$main" &&
> ++ test_commit --no-tag desc-main file c &&
> ++ git merge --no-ff -m "merge desc-above" desc-above &&
> ++ git branch -D desc-above &&
> ++ head_before=$(git rev-parse HEAD) &&
> ++
> ++ test_must_fail git history squash start..desc-tip 2>err &&
> ++ test_grep "merge commits is not supported" err &&
> ++ test_cmp_rev "$head_before" HEAD
> ++'
> ++
> ++test_expect_success 'refuses to fold a range a ref points into at a merge' '
> ++ git reset --hard start &&
> ++ main=$(git symbolic-ref --short HEAD) &&
> ++ test_commit --no-tag refmerge-base file b &&
> ++ git checkout -b refmerge-side &&
> ++ test_commit --no-tag refmerge-side side x &&
> ++ git checkout "$main" &&
> ++ test_commit --no-tag refmerge-main file c &&
> ++ git merge --no-ff -m "interior merge" refmerge-side &&
> ++ git branch -D refmerge-side &&
> ++ git branch at-merge HEAD &&
> ++ test_commit --no-tag refmerge-tail file d &&
> ++ head_before=$(git rev-parse HEAD) &&
> ++
> ++ test_must_fail git history squash start.. 2>err &&
> ++ test_grep "at-merge" err &&
> ++ test_grep "points into the squashed range" err &&
> ++ test_cmp_rev "$head_before" HEAD &&
> ++
> ++ git branch -D at-merge
> ++'
> ++
> +test_done
> 4: 85c7817d7e = 4: a758e1f084 history: re-edit a squash with every message
>
^ permalink raw reply
* [PATCH v2 2/2] environment: move excludes_file into repo_config_values
From: Tian Yuchen @ 2026-06-26 7:50 UTC (permalink / raw)
To: git
Cc: cirnovskyv, Tian Yuchen, Christian Couder, Ayush Chandekar,
Olamide Caleb Bello
In-Reply-To: <20260626075037.532164-1-cat@malon.dev>
Continue the libification effort by moving the 'excludes_file' global
variable into 'struct repo_config_values'.
Since 'excludes_file' is a dynamically allocated string (char *), it
requires proper memory management. Introduce repo_config_values_clear()
to safely free the heap memory when repository instance is destroyed.
Note:
- 'if (repo != the_repository)' fallback logic is temporarily added
in both the getter and the clear function. This prevents calling
repo_config_values() on uninitialized submodules, which triggers BUG().
- 'attribute_file' is another string variable that was migrated
earlier. Its FREE_AND_NULL() call is also added to
repo_config_values_clear().
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Ayush Chandekar <ayu.chandekar@gmail.com>
Mentored-by: Olamide Caleb Bello <belkid98@gmail.com>
Signed-off-by: Tian Yuchen <cat@malon.dev>
---
environment.c | 31 +++++++++++++++++++++++++------
environment.h | 14 ++++++++++----
repository.c | 1 +
3 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/environment.c b/environment.c
index 8efcaeafa6..b8dfa3e213 100644
--- a/environment.c
+++ b/environment.c
@@ -57,7 +57,6 @@ enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
char *editor_program;
char *askpass_program;
-char *excludes_file;
enum auto_crlf auto_crlf = AUTO_CRLF_FALSE;
enum eol core_eol = EOL_UNSET;
int global_conv_flags_eol = CONV_EOL_RNDTRP_WARN;
@@ -136,9 +135,13 @@ int is_bare_repository(void)
const char *repo_excludes_file(struct repository *repo)
{
- if (!excludes_file)
- excludes_file = xdg_config_home("ignore");
- return excludes_file;
+ if (!repo || !repo->initialized || repo != the_repository)
+ return NULL;
+
+ if (!repo_config_values(repo)->excludes_file)
+ repo_config_values(repo)->excludes_file = xdg_config_home("ignore");
+
+ return repo_config_values(repo)->excludes_file;
}
int have_git_dir(void)
@@ -468,8 +471,8 @@ int git_default_core_config(const char *var, const char *value,
}
if (!strcmp(var, "core.excludesfile")) {
- FREE_AND_NULL(excludes_file);
- return git_config_pathname(&excludes_file, var, value);
+ FREE_AND_NULL(cfg->excludes_file);
+ return git_config_pathname(&cfg->excludes_file, var, value);
}
if (!strcmp(var, "core.whitespace")) {
@@ -722,6 +725,7 @@ int git_default_config(const char *var, const char *value,
void repo_config_values_init(struct repo_config_values *cfg)
{
cfg->attributes_file = NULL;
+ cfg->excludes_file = NULL;
cfg->apply_sparse_checkout = 0;
cfg->branch_track = BRANCH_TRACK_REMOTE;
cfg->trust_ctime = 1;
@@ -733,3 +737,18 @@ void repo_config_values_init(struct repo_config_values *cfg)
cfg->sparse_expect_files_outside_of_patterns = 0;
cfg->warn_on_object_refname_ambiguity = 1;
}
+
+void repo_config_values_clear(struct repository *repo)
+{
+ struct repo_config_values *cfg;
+
+ if (repo != the_repository)
+ return;
+
+ cfg = repo_config_values(repo);
+ if (!cfg)
+ return;
+
+ FREE_AND_NULL(cfg->attributes_file);
+ FREE_AND_NULL(cfg->excludes_file);
+}
diff --git a/environment.h b/environment.h
index 52d531e4ea..2e8352de7f 100644
--- a/environment.h
+++ b/environment.h
@@ -90,6 +90,7 @@ struct repository;
struct repo_config_values {
/* section "core" config values */
char *attributes_file;
+ char *excludes_file;
int apply_sparse_checkout;
int trust_ctime;
int check_stat;
@@ -133,13 +134,19 @@ int git_default_config(const char *, const char *,
int git_default_core_config(const char *var, const char *value,
const struct config_context *ctx, void *cb);
-/*
- * TODO: This still relies on the global state.
- */
const char *repo_excludes_file(struct repository *repo);
void repo_config_values_init(struct repo_config_values *cfg);
+/*
+ * Frees memory allocated for dynamically loaded configuration values
+ * inside `repo_config_values`.
+ *
+ * As dynamically allocated variables are migrated into this struct,
+ * their FREE_AND_NULL() calls should be appended here.
+ */
+void repo_config_values_clear(struct repository *repo);
+
/*
* TODO: All the below state either explicitly or implicitly relies on
* `the_repository`. We should eventually get rid of these and make the
@@ -213,7 +220,6 @@ extern char *git_log_output_encoding;
extern char *editor_program;
extern char *askpass_program;
-extern char *excludes_file;
/*
* The character that begins a commented line in user-editable file
diff --git a/repository.c b/repository.c
index 187dd471c4..b31f1b7852 100644
--- a/repository.c
+++ b/repository.c
@@ -388,6 +388,7 @@ void repo_clear(struct repository *repo)
FREE_AND_NULL(repo->parsed_objects);
repo_settings_clear(repo);
+ repo_config_values_clear(repo);
if (repo->config) {
git_configset_clear(repo->config);
--
2.43.0
^ permalink raw reply related
* [PATCH v2 1/2] dir: encapsulate excludes_file lazy-load
From: Tian Yuchen @ 2026-06-26 7:50 UTC (permalink / raw)
To: git
Cc: cirnovskyv, Tian Yuchen, Christian Couder, Ayush Chandekar,
Olamide Caleb Bello
In-Reply-To: <20260626075037.532164-1-cat@malon.dev>
The global variable 'excludes_file' is used to track the path to the
global ignore file, 'core.excludesfile'. If this variable is NULL,
setup_standard_excludes() in dir.c forcefully evaluates and assigns
the XDG default path to it.
Introduce repo_excludes_file() as a getter to encapsulate this
lazy-loading logic. This prepares the variable to be safely moved
into 'struct repo_config_values' in the subsequent commit.
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Ayush Chandekar <ayu.chandekar@gmail.com>
Mentored-by: Olamide Caleb Bello <belkid98@gmail.com>
Signed-off-by: Tian Yuchen <cat@malon.dev>
---
dir.c | 4 ++--
environment.c | 7 +++++++
environment.h | 5 +++++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/dir.c b/dir.c
index 7a73690fbc..4f87a52b3c 100644
--- a/dir.c
+++ b/dir.c
@@ -3481,11 +3481,11 @@ static GIT_PATH_FUNC(git_path_info_exclude, "info/exclude")
void setup_standard_excludes(struct dir_struct *dir)
{
+ const char *excludes_file = repo_excludes_file(the_repository);
+
dir->exclude_per_dir = ".gitignore";
/* core.excludesfile defaulting to $XDG_CONFIG_HOME/git/ignore */
- if (!excludes_file)
- excludes_file = xdg_config_home("ignore");
if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
add_patterns_from_file_1(dir, excludes_file,
dir->untracked ? &dir->internal.ss_excludes_file : NULL);
diff --git a/environment.c b/environment.c
index ba2c60103f..8efcaeafa6 100644
--- a/environment.c
+++ b/environment.c
@@ -134,6 +134,13 @@ int is_bare_repository(void)
return is_bare_repository_cfg && !repo_get_work_tree(the_repository);
}
+const char *repo_excludes_file(struct repository *repo)
+{
+ if (!excludes_file)
+ excludes_file = xdg_config_home("ignore");
+ return excludes_file;
+}
+
int have_git_dir(void)
{
return startup_info->have_repository
diff --git a/environment.h b/environment.h
index 6f18286955..52d531e4ea 100644
--- a/environment.h
+++ b/environment.h
@@ -133,6 +133,11 @@ int git_default_config(const char *, const char *,
int git_default_core_config(const char *var, const char *value,
const struct config_context *ctx, void *cb);
+/*
+ * TODO: This still relies on the global state.
+ */
+const char *repo_excludes_file(struct repository *repo);
+
void repo_config_values_init(struct repo_config_values *cfg);
/*
--
2.43.0
^ permalink raw reply related
* [PATCH v2 0/2] environment: move excludes_file into repo_config_values
From: Tian Yuchen @ 2026-06-26 7:50 UTC (permalink / raw)
To: git
Cc: cirnovskyv, Tian Yuchen, Christian Couder, Ayush Chandekar,
Olamide Caleb Bello
This series continues the libification effort by migrating the global
string variable 'excludes_file' into 'struct repo_config_values'. Since
this is a dynamically allocated variable, the migration requires proper
heap memory management.
The series is structured in two commits:
- Abstract the XDG fallback lazy-loading logic out of dir.c into a proper
getter.
- Move the variables into the struct and introduce 'repo_config_values_clear()'.
Note on Submodules: A temporary shield 'if (repo != the_repository)' is
included in both the getter and the clear function. This prevents
uninitialized submodules from triggering the BUG() assertion.
(Inspiration: [1])
Changes since V1:
- fix a typo in the second commit.
- initialize excludes_file to NULL in repo_config_values_init().
- call FREE_AND_NULL() to free attributes_file as well in
repo_config_values_clear().
Thanks!
Mentored-by: Christian Couder <christian.couder@gmail.com>
Mentored-by: Ayush Chandekar <ayu.chandekar@gmail.com>
Mentored-by: Olamide Caleb Bello <belkid98@gmail.com>
Signed-off-by: Tian Yuchen <cat@malon.dev>
[1] https://lore.kernel.org/git/c95a7730-7b14-4be0-a4e4-861b2f5430ea@gmail.com/
Tian Yuchen (2):
dir: encapsulate excludes_file lazy-load
environment: move excludes_file into repo_config_values
dir.c | 4 ++--
environment.c | 32 +++++++++++++++++++++++++++++---
environment.h | 13 ++++++++++++-
repository.c | 1 +
4 files changed, 44 insertions(+), 6 deletions(-)
--
2.43.0
^ permalink raw reply
* Re: [PATCH 05/11] reftable/block: fix OOB write with bogus inflated log size
From: Christian Couder @ 2026-06-26 7:48 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, oxsignal
In-Reply-To: <20260624-pks-reftable-hardening-v1-5-66e4ce87c6b9@pks.im>
On Wed, Jun 24, 2026 at 10:24 AM Patrick Steinhardt <ps@pks.im> wrote:
> diff --git a/t/unit-tests/u-reftable-block.c b/t/unit-tests/u-reftable-block.c
> index f4bded7d26..40274af5c0 100644
> --- a/t/unit-tests/u-reftable-block.c
> +++ b/t/unit-tests/u-reftable-block.c
> @@ -456,3 +456,47 @@ void test_reftable_block__iterator(void)
> block_writer_release(&writer);
> reftable_buf_release(&data);
> }
> +
> +void test_reftable_block__corrupt_log_block_size(void)
> +{
> + struct reftable_block_source source = { 0 };
> + struct block_writer writer = {
> + .last_key = REFTABLE_BUF_INIT,
> + };
> + struct reftable_record rec = {
> + .type = REFTABLE_BLOCK_TYPE_LOG,
> + .u.log = {
> + .refname = (char *) "refs/heads/main",
> + .update_index = 1,
> + .value_type = REFTABLE_LOG_UPDATE,
> + },
> + };
> + struct reftable_block block = { 0 };
> + struct reftable_buf data;
> +
> + data.len = 1024;
> + REFTABLE_CALLOC_ARRAY(data.buf, data.len);
> + cl_assert(data.buf != NULL);
> +
> + cl_must_pass(block_writer_init(&writer, REFTABLE_BLOCK_TYPE_LOG,
> + (uint8_t *) data.buf, data.len,
> + 0, hash_size(REFTABLE_HASH_SHA1)));
> + cl_must_pass(block_writer_add(&writer, &rec));
> + cl_assert(block_writer_finish(&writer) > 0);
It looks like some of the block writing code above could be simplified
using an helper function like:
int cl_reftable_write_block(struct reftable_buf *buf, uint8_t block_type,
size_t block_size, uint32_t header_off,
struct reftable_record *recs, size_t nrecs)
{
struct block_writer writer = {
.last_key = REFTABLE_BUF_INIT,
};
int block_end;
REFTABLE_CALLOC_ARRAY(buf->buf, block_size);
cl_assert(buf->buf != NULL);
buf->len = block_size;
cl_must_pass(block_writer_init(&writer, block_type, (uint8_t *) buf->buf,
block_size, header_off,
hash_size(REFTABLE_HASH_SHA1)));
for (size_t i = 0; i < nrecs; i++)
cl_must_pass(block_writer_add(&writer, &recs[i]));
block_end = block_writer_finish(&writer);
cl_assert(block_end > 0);
block_writer_release(&writer);
return block_end;
}
This function could be introduced by a preparatory commit in
t/unit-tests/lib-reftable.{c,h}. It would be kind of similar to the
existing cl_reftable_write_to_buf() helper in those files.
It looks like it could already simplify existing tests like:
- test_reftable_block__log_read_write
- test_reftable_block__obj_read_write
- test_reftable_block__ref_read_write
- test_reftable_block__iterator
and it could simplify the new tests introduced by other patches in this series:
- 06/11 reftable/block: fix OOB read with bogus block size
- 07/11 reftable/block: fix OOB read with bogus restart count
- 09/11 reftable/block: fix OOB read with bogus restart offset
> + /*
> + * Log blocks store their inflated size as a big-endian 24-bit integer
> + * right after the one-byte block type. Rewrite it to claim a size that
> + * is smaller than the block header.
> + */
> + reftable_put_be24((uint8_t *) data.buf + 1, 1);
> +
> + block_source_from_buf(&source, &data);
> + cl_assert_equal_i(reftable_block_init(&block, &source, 0, 0, data.len,
> + REFTABLE_HASH_SIZE_SHA1, REFTABLE_BLOCK_TYPE_LOG),
> + REFTABLE_FORMAT_ERROR);
> +
> + reftable_block_release(&block);
> + block_writer_release(&writer);
> + reftable_buf_release(&data);
> +}
Otherwise the series looks great to me.
Thanks.
^ permalink raw reply
* Re: [PATCH v3 4/4] connected: search promisor objects generically
From: Christian Couder @ 2026-06-26 6:55 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Patrick Steinhardt, git, Christian Couder
In-Reply-To: <xmqq4iiqfk0l.fsf@gitster.g>
On Thu, Jun 25, 2026 at 10:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Patrick Steinhardt <ps@pks.im> writes:
>
> > When performing connectivity checks we have to figure out whether any of
> > the new objects are promisor objects, as we cannot assume full
> > connectivity if so.
> >
> > This check is performed by iterating through all packfiles in the
> > repository and searching each of them for the given object. Of course,
> > this mechanism is quite specific to implementation details of the object
> > database, as we assume that it uses packfiles in the first place.
> >
> > Refactor the logic so that we instead use `odb_for_each_object_ext()`
> > with an object prefix filter and the `ODB_FOR_EACH_OBJECT_PROMISOR_ONLY`
> > flag. This will yield all objects that have the exact object name and
> > that are part of a promisor pack in a generic way.
> >
> > Add a test to verify that we indeed use the optimization.
>
> OK. The new test is a good way to catch the issue we noticed in the
> previous round, I guess. Looking good.
Yeah, it looks ready to me too.
Thanks.
^ permalink raw reply
* [PATCH v5 3/3] replay: offer an option to linearize the commit topology
From: Toon Claes @ 2026-06-26 5:48 UTC (permalink / raw)
To: git; +Cc: Elijah Newren, Johannes Schindelin, Toon Claes,
Johannes Schindelin
In-Reply-To: <20260626-toon-git-replay-drop-merges-v5-0-5e120738b9d0@iotcl.com>
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
One of the stated goals of git-replay(1) is to allow implementing the
git-rebase(1) functionality on the server side.
The default mode of git-rebase(1) is to act as if `--no-rebase-merges`
was given. This mode drops merge commits instead of replaying them, and
linearizes the commit history into a sequence of the
regular (single-parent) commits.
Add option `--linearize` to git-replay(1) to do the same.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Toon Claes <toon@iotcl.com>
---
Documentation/git-replay.adoc | 8 ++++-
builtin/replay.c | 6 +++-
replay.c | 50 ++++++++++++++++----------
replay.h | 5 +++
t/t3650-replay-basics.sh | 84 ++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 132 insertions(+), 21 deletions(-)
diff --git a/Documentation/git-replay.adoc b/Documentation/git-replay.adoc
index a32f72aead..ef56ee0f1b 100644
--- a/Documentation/git-replay.adoc
+++ b/Documentation/git-replay.adoc
@@ -10,7 +10,7 @@ SYNOPSIS
--------
[verse]
(EXPERIMENTAL!) 'git replay' ([--contained] --onto=<newbase> | --advance=<branch> | --revert=<branch>)
- [--ref=<ref>] [--ref-action=<mode>] <revision-range>
+ [--ref=<ref>] [--ref-action=<mode>] [--linearize] <revision-range>
DESCRIPTION
-----------
@@ -88,6 +88,12 @@ incompatible with `--contained` (which is a modifier for `--onto` only).
+
The default mode can be configured via the `replay.refAction` configuration variable.
+--linearize::
+ In this mode, `git replay` imitates `git rebase --no-rebase-merges`,
+ i.e. it cherry-picks only non-merge commits, each one on top of the
+ previous one.
+ This option is incompatible with `--revert`.
+
<revision-range>::
Range of commits to replay; see "Specifying Ranges" in
linkgit:git-rev-parse[1]. In `--advance=<branch>` or
diff --git a/builtin/replay.c b/builtin/replay.c
index 39e3a86f6c..62962c73c7 100644
--- a/builtin/replay.c
+++ b/builtin/replay.c
@@ -85,7 +85,7 @@ int cmd_replay(int argc,
const char *const replay_usage[] = {
N_("(EXPERIMENTAL!) git replay "
"([--contained] --onto=<newbase> | --advance=<branch> | --revert=<branch>)\n"
- "[--ref=<ref>] [--ref-action=<mode>] <revision-range>"),
+ "[--ref=<ref>] [--ref-action=<mode>] [--linearize] <revision-range>"),
NULL
};
struct option replay_options[] = {
@@ -111,6 +111,8 @@ int cmd_replay(int argc,
N_("mode"),
N_("control ref update behavior (update|print)"),
PARSE_OPT_NONEG),
+ OPT_BOOL(0, "linearize", &opts.linearize,
+ N_("drop merge commits, replaying only non-merge commits")),
OPT_END()
};
@@ -132,6 +134,8 @@ int cmd_replay(int argc,
opts.contained, "--contained");
die_for_incompatible_opt2(!!opts.ref, "--ref",
!!opts.contained, "--contained");
+ die_for_incompatible_opt2(!!opts.revert, "--revert",
+ opts.linearize, "--linearize");
/* Parse ref action mode from command line or config */
ref_mode = get_ref_action_mode(repo, ref_action);
diff --git a/replay.c b/replay.c
index 86fba47fb9..d803e0312f 100644
--- a/replay.c
+++ b/replay.c
@@ -439,24 +439,38 @@ int replay_revisions(struct rev_info *revs,
while ((commit = get_revision(revs))) {
const struct name_decoration *decoration;
- /*
- * pick_regular_commit() looks up the parent of `commit` in
- * `replayed_commits` to determine the ancestor to replay onto.
- * The `default_base` parameter is used when no ancestor is found,
- * which happens for the first commit in the revision range.
- * When reverting, commits are replayed in reverse order, so the
- * lookup never succeeds, and we need to pass `last_commit`.
- */
- struct commit *base = onto;
- if (mode == REPLAY_MODE_REVERT)
- base = last_commit;
-
- if (commit->parents && commit->parents->next)
- die(_("replaying merge commits is not supported yet!"));
-
- last_commit = pick_regular_commit(revs->repo, commit, base,
- replayed_commits,
- &merge_opt, &result, mode, opts->empty);
+ if (commit->parents && commit->parents->next) {
+ if (!opts->linearize)
+ die(_("replaying merge commits is not supported yet!"));
+ /*
+ * Drop the merge commit: do not pick it, leave
+ * `last_commit` unchanged, and fall through to the
+ * rest of the loop. As a result:
+ * - the merge commit is mapped to `last_commit` in
+ * `replayed_commits`, this will become the parent for
+ * the child commits.
+ * - refs previously pointing to the merge commit are
+ * rewritten to point to the previous non-merge commit.
+ */
+ } else {
+ /*
+ * pick_regular_commit() looks up the parent of `commit` in
+ * `replayed_commits` to determine the ancestor to replay onto.
+ * The `default_base` parameter is used when no ancestor is found,
+ * which happens for the first commit in the revision range.
+ * When reverting, commits are replayed in reverse order, so the
+ * lookup never succeeds, and we need to pass `last_commit`.
+ */
+ struct commit *base = onto;
+ if (mode == REPLAY_MODE_REVERT)
+ base = last_commit;
+
+ last_commit = pick_regular_commit(revs->repo, commit, base,
+ replayed_commits,
+ &merge_opt, &result,
+ mode, opts->empty);
+ }
+
if (!last_commit)
break;
diff --git a/replay.h b/replay.h
index faf95c7459..64f42b6512 100644
--- a/replay.h
+++ b/replay.h
@@ -62,6 +62,11 @@ struct replay_revisions_options {
* Defaults to REPLAY_EMPTY_COMMIT_DROP.
*/
enum replay_empty_commit_action empty;
+
+ /*
+ * Whether to linearize the commits (i.e. drop merge commits).
+ */
+ int linearize;
};
/* This struct is used as an out-parameter by `replay_revisions()`. */
diff --git a/t/t3650-replay-basics.sh b/t/t3650-replay-basics.sh
index 3353bc4a4d..34c038eab9 100755
--- a/t/t3650-replay-basics.sh
+++ b/t/t3650-replay-basics.sh
@@ -52,8 +52,12 @@ test_expect_success 'setup' '
test_merge P O --no-ff &&
git switch main &&
+ git switch --orphan unrelated &&
+ test_commit unrelated-root &&
+
git switch -c conflict B &&
- test_commit C.conflict C.t conflict
+ test_commit C.conflict C.t conflict &&
+ git branch -D unrelated
'
test_expect_success 'setup bare' '
@@ -97,6 +101,12 @@ test_expect_success '--advance and --contained cannot be used together' '
test_grep "cannot be used together" actual
'
+test_expect_success '--revert and --linearize cannot be used together' '
+ test_must_fail git replay --revert=main --linearize \
+ topic1..topic2 2>actual &&
+ test_grep "cannot be used together" actual
+'
+
test_expect_success 'cannot advance target ... ordering would be ill-defined' '
echo "fatal: ${SQ}--advance${SQ} cannot be used with multiple revision ranges because the ordering would be ill-defined" >expect &&
test_must_fail git replay --advance=main main topic1 topic2 2>actual &&
@@ -565,4 +575,76 @@ test_expect_success '--onto with --ref rejects multiple revision ranges' '
test_grep "cannot be used with multiple revision ranges" err
'
+test_expect_success 'replay to rebase merge commit with --linearize' '
+ git replay --ref-action=print --linearize \
+ --onto main I..topic-with-merge >result &&
+
+ test_line_count = 1 result &&
+
+ git log --format=%s $(cut -f 3 -d " " result) >actual &&
+ test_write_lines O N J M L B A >expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'replay to rebase merge commit with --linearize down to the root commit' '
+ git replay --ref-action=print --linearize \
+ --onto unrelated-root topic-with-merge >result &&
+
+ test_line_count = 1 result &&
+
+ git log --format=%s $(cut -f 3 -d " " result) >actual &&
+ test_write_lines O N J I B A unrelated-root >expect &&
+ test_cmp expect actual
+'
+
+test_expect_success 'replay to cherry-pick merge commit with --linearize' '
+ git replay --ref-action=print --linearize \
+ --advance main I..topic-with-merge >result &&
+
+ test_line_count = 1 result &&
+
+ git log --format=%s $(cut -f 3 -d " " result) >actual &&
+ test_write_lines O N J M L B A >expect &&
+ test_cmp expect actual &&
+
+ printf "update refs/heads/main " >expect &&
+ printf "%s " $(cut -f 3 -d " " result) >>expect &&
+ git rev-parse main >>expect &&
+ test_cmp expect result
+'
+
+test_expect_success 'replay --linearize produces the same patches' '
+ git replay --ref-action=print --linearize \
+ --onto main I..topic-with-merge >result &&
+
+ test_line_count = 1 result &&
+ tip=$(cut -f 3 -d " " result) &&
+
+ # range-diff does not care about the dropped merge,
+ # so the original commits (I..topic-with-merge)
+ # and the replayed chain (main..tip) must produce identical patches.
+ git range-diff I..topic-with-merge main..$tip >out &&
+ test_file_not_empty out &&
+ test_grep ! -v "=" out &&
+
+ git log --oneline main..$tip >out &&
+ test_line_count = 3 out
+'
+
+test_expect_success 'replay with --linearize to rebase multiple divergent branches' '
+ git replay --ref-action=print --linearize \
+ --onto main ^B topic2 topic-with-merge >result &&
+
+ test_line_count = 2 result &&
+ cut -f 3 -d " " result >new-branch-tips &&
+
+ git log --format=%s $(head -n 1 new-branch-tips) >actual &&
+ test_write_lines E D C M L B A >expect &&
+ test_cmp expect actual &&
+
+ git log --format=%s $(tail -n 1 new-branch-tips) >actual &&
+ test_write_lines O N J I M L B A >expect &&
+ test_cmp expect actual
+'
+
test_done
--
2.53.0.1323.g189a785ab5
^ permalink raw reply related
* [PATCH v5 2/3] replay: better explain how pick_regular_commit() picks a base
From: Toon Claes @ 2026-06-26 5:48 UTC (permalink / raw)
To: git; +Cc: Elijah Newren, Toon Claes
In-Reply-To: <20260626-toon-git-replay-drop-merges-v5-0-5e120738b9d0@iotcl.com>
The function pick_regular_commit() will replay the `pickme` commit. To
determine the ancestor where to replay this commit on, it takes the
parent of the commit and looks up its replayed result in
`replayed_commits`. If no ancestor is found, the `onto` parameter is
used as fallback.
The name `onto` is rather confusing, so rename it to `default_base`. And
while at it, shuffle the function parameters so `struct commit`
parameters are immediate siblings.
When in mode REPLAY_MODE_REVERT, the fallback `default_base` will always
be used. This happens because commits are replayed in reverse order, so
looking up the `pickme`'s parent in `replayed_commits` will always
return empty. And to make these commits stack on top of each other, we
need to pass in `last_commit`.
Signed-off-by: Toon Claes <toon@iotcl.com>
---
replay.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/replay.c b/replay.c
index 7bde1c7e93..86fba47fb9 100644
--- a/replay.c
+++ b/replay.c
@@ -280,8 +280,8 @@ static void put_mapped_commit(kh_oid_map_t *replayed_commits,
static struct commit *pick_regular_commit(struct repository *repo,
struct commit *pickme,
+ struct commit *default_base,
kh_oid_map_t *replayed_commits,
- struct commit *onto,
struct merge_options *merge_opt,
struct merge_result *result,
enum replay_mode mode,
@@ -298,7 +298,7 @@ static struct commit *pick_regular_commit(struct repository *repo,
base_tree = lookup_tree(repo, repo->hash_algo->empty_tree);
}
- replayed_base = get_mapped_commit(replayed_commits, base, onto);
+ replayed_base = get_mapped_commit(replayed_commits, base, default_base);
replayed_base_tree = repo_get_commit_tree(repo, replayed_base);
pickme_tree = repo_get_commit_tree(repo, pickme);
@@ -439,11 +439,23 @@ int replay_revisions(struct rev_info *revs,
while ((commit = get_revision(revs))) {
const struct name_decoration *decoration;
+ /*
+ * pick_regular_commit() looks up the parent of `commit` in
+ * `replayed_commits` to determine the ancestor to replay onto.
+ * The `default_base` parameter is used when no ancestor is found,
+ * which happens for the first commit in the revision range.
+ * When reverting, commits are replayed in reverse order, so the
+ * lookup never succeeds, and we need to pass `last_commit`.
+ */
+ struct commit *base = onto;
+ if (mode == REPLAY_MODE_REVERT)
+ base = last_commit;
+
if (commit->parents && commit->parents->next)
die(_("replaying merge commits is not supported yet!"));
- last_commit = pick_regular_commit(revs->repo, commit, replayed_commits,
- mode == REPLAY_MODE_REVERT ? last_commit : onto,
+ last_commit = pick_regular_commit(revs->repo, commit, base,
+ replayed_commits,
&merge_opt, &result, mode, opts->empty);
if (!last_commit)
break;
--
2.53.0.1323.g189a785ab5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox