Git development
 help / color / mirror / Atom feed
* [PATCH 2/3] revision: introduce rev_walk_mode to clarify get_revision_1()
From: Kristofer Karlsson via GitGitGadget @ 2026-05-27 15:50 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Kristofer Karlsson
In-Reply-To: <pull.2127.git.1779897003.gitgitgadget@gmail.com>

From: Kristofer Karlsson <krka@spotify.com>

get_revision_1() dispatches to different walk strategies based on a
combination of rev_info flags: reflog_info, topo_walk_info, and
limited.  These conditions are checked in multiple places within
the function -- once to select the next commit, and again to decide
how to expand parents -- and the two chains must stay in sync.

Extract the mode selection into a rev_walk_mode enum and a small
get_walk_mode() helper, resolved once at the top of get_revision_1().
Both dispatch sites now switch on the same mode variable, making it
obvious that they agree and easier to verify that all modes are
handled.

No functional change.

Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
 revision.c | 62 ++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/revision.c b/revision.c
index e1970b9c5d..9d0fc696d0 100644
--- a/revision.c
+++ b/revision.c
@@ -4327,22 +4327,48 @@ static void track_linear(struct rev_info *revs, struct commit *commit)
 	revs->previous_parents = commit_list_copy(commit->parents);
 }
 
+enum rev_walk_mode {
+	REV_WALK_REFLOG,
+	REV_WALK_TOPO,
+	REV_WALK_LIMITED,
+	REV_WALK_STREAMING,
+};
+
+static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
+{
+	if (revs->reflog_info)
+		return REV_WALK_REFLOG;
+	if (revs->topo_walk_info)
+		return REV_WALK_TOPO;
+	if (revs->limited)
+		return REV_WALK_LIMITED;
+	return REV_WALK_STREAMING;
+}
+
 static struct commit *get_revision_1(struct rev_info *revs)
 {
+	enum rev_walk_mode mode = get_walk_mode(revs);
+
 	while (1) {
 		struct commit *commit;
 
-		if (revs->reflog_info)
+		switch (mode) {
+		case REV_WALK_REFLOG:
 			commit = next_reflog_entry(revs->reflog_info);
-		else if (revs->topo_walk_info)
+			break;
+		case REV_WALK_TOPO:
 			commit = next_topo_commit(revs);
-		else
+			break;
+		case REV_WALK_LIMITED:
+		case REV_WALK_STREAMING:
 			commit = pop_commit(&revs->commits);
+			break;
+		}
 
 		if (!commit)
 			return NULL;
 
-		if (revs->reflog_info)
+		if (mode == REV_WALK_REFLOG)
 			commit->object.flags &= ~(ADDED | SEEN | SHOWN);
 
 		/*
@@ -4350,20 +4376,28 @@ static struct commit *get_revision_1(struct rev_info *revs)
 		 * the parents here. We also need to do the date-based limiting
 		 * that we'd otherwise have done in limit_list().
 		 */
-		if (!revs->limited) {
-			if (revs->max_age != -1 &&
-			    comparison_date(revs, commit) < revs->max_age)
-				continue;
+		if (mode != REV_WALK_LIMITED &&
+		    revs->max_age != -1 &&
+		    comparison_date(revs, commit) < revs->max_age)
+			continue;
 
-			if (revs->reflog_info)
-				try_to_simplify_commit(revs, commit);
-			else if (revs->topo_walk_info)
-				expand_topo_walk(revs, commit);
-			else if (process_parents(revs, commit, &revs->commits, NULL) < 0) {
+		switch (mode) {
+		case REV_WALK_REFLOG:
+			try_to_simplify_commit(revs, commit);
+			break;
+		case REV_WALK_TOPO:
+			expand_topo_walk(revs, commit);
+			break;
+		case REV_WALK_STREAMING:
+			if (process_parents(revs, commit,
+					    &revs->commits, NULL) < 0) {
 				if (!revs->ignore_missing_links)
 					die("Failed to traverse parents of commit %s",
-						oid_to_hex(&commit->object.oid));
+					    oid_to_hex(&commit->object.oid));
 			}
+			break;
+		case REV_WALK_LIMITED:
+			break;
 		}
 
 		switch (simplify_commit(revs, commit)) {
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH 3/3] revision: use priority queue for non-limited streaming walks
From: Kristofer Karlsson via GitGitGadget @ 2026-05-27 15:50 UTC (permalink / raw)
  To: git; +Cc: Kristofer Karlsson, Kristofer Karlsson
In-Reply-To: <pull.2127.git.1779897003.gitgitgadget@gmail.com>

From: Kristofer Karlsson <krka@spotify.com>

The streaming (non-limited) walk in get_revision_1() inserts newly
discovered parent commits into a date-sorted queue via
commit_list_insert_by_date(), which scans the linked list to find the
insertion point -- O(w) per insert, where w is the width of the active
walk frontier.  Replace this with an O(log w) priority queue.

Add a commit_queue field to rev_info alongside the existing commits
linked list.  The two representations are mutually exclusive: setup
and external callers that need list access use the linked list, then
get_revision_1() lazily drains it into the priority queue on first
call.  Add a REV_WALK_NO_WALK enum value to distinguish the no_walk
case (which still uses the commit list) from the streaming case.

The conversion function rev_info_commit_list_to_queue() is public so
callers that know they will iterate can convert early.

Combined with the limit_list() priority queue change already in
master, this eliminates all O(w) sorted linked-list insertion from
the revision walk machinery.

Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
 commit.c   | 13 -------------
 commit.h   |  2 --
 revision.c | 55 +++++++++++++++++++++++++++++-------------------------
 revision.h | 12 +++++++++++-
 4 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/commit.c b/commit.c
index e3e7352e69..5112c7b2af 100644
--- a/commit.c
+++ b/commit.c
@@ -729,19 +729,6 @@ void commit_list_free(struct commit_list *list)
 		pop_commit(&list);
 }
 
-struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
-{
-	struct commit_list **pp = list;
-	struct commit_list *p;
-	while ((p = *pp) != NULL) {
-		if (p->item->date < item->date) {
-			break;
-		}
-		pp = &p->next;
-	}
-	return commit_list_insert(item, pp);
-}
-
 static int commit_list_compare_by_date(const struct commit_list *a,
 				       const struct commit_list *b)
 {
diff --git a/commit.h b/commit.h
index 58150045af..385492fbb1 100644
--- a/commit.h
+++ b/commit.h
@@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item,
 struct commit_list **commit_list_append(struct commit *commit,
 					struct commit_list **next);
 unsigned commit_list_count(const struct commit_list *l);
-struct commit_list *commit_list_insert_by_date(struct commit *item,
-				    struct commit_list **list);
 void commit_list_sort_by_date(struct commit_list **list);
 
 /* Shallow copy of the input list */
diff --git a/revision.c b/revision.c
index 9d0fc696d0..4bb3b16e43 100644
--- a/revision.c
+++ b/revision.c
@@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 }
 
 static int process_parents(struct rev_info *revs, struct commit *commit,
-			   struct commit_list **list, struct prio_queue *queue)
+			   struct prio_queue *queue)
 {
 	struct commit_list *parent = commit->parents;
 	unsigned pass_flags;
@@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 			if (p->object.flags & SEEN)
 				continue;
 			p->object.flags |= (SEEN | NOT_USER_GIVEN);
-			if (list)
-				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
 			if (revs->exclude_first_parent_only)
@@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 		p->object.flags |= pass_flags | CHILD_VISITED;
 		if (!(p->object.flags & SEEN)) {
 			p->object.flags |= (SEEN | NOT_USER_GIVEN);
-			if (list)
-				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
 		}
@@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs)
 
 		if (revs->max_age != -1 && (commit->date < revs->max_age))
 			obj->flags |= UNINTERESTING;
-		if (process_parents(revs, commit, NULL, &queue) < 0) {
+		if (process_parents(revs, commit, &queue) < 0) {
 			clear_prio_queue(&queue);
 			return -1;
 		}
@@ -3257,6 +3253,7 @@ static void free_void_commit_list(void *list)
 void release_revisions(struct rev_info *revs)
 {
 	commit_list_free(revs->commits);
+	clear_prio_queue(&revs->commit_queue);
 	commit_list_free(revs->ancestry_path_bottoms);
 	release_display_notes(&revs->notes_opt);
 	object_array_clear(&revs->pending);
@@ -3726,7 +3723,7 @@ static void explore_walk_step(struct rev_info *revs)
 	if (revs->max_age != -1 && (c->date < revs->max_age))
 		c->object.flags |= UNINTERESTING;
 
-	if (process_parents(revs, c, NULL, NULL) < 0)
+	if (process_parents(revs, c, NULL) < 0)
 		return;
 
 	if (c->object.flags & UNINTERESTING)
@@ -3902,7 +3899,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
 {
 	struct commit_list *p;
 	struct topo_walk_info *info = revs->topo_walk_info;
-	if (process_parents(revs, commit, NULL, NULL) < 0) {
+	if (process_parents(revs, commit, NULL) < 0) {
 		if (!revs->ignore_missing_links)
 			die("Failed to traverse parents of commit %s",
 			    oid_to_hex(&commit->object.oid));
@@ -3938,6 +3935,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
 	}
 }
 
+void rev_info_commit_list_to_queue(struct rev_info *revs)
+{
+	while (revs->commits)
+		prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits));
+}
+
+
 int prepare_revision_walk(struct rev_info *revs)
 {
 	int i;
@@ -4006,7 +4010,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
 	for (;;) {
 		struct commit *p = *pp;
 		if (!revs->limited)
-			if (process_parents(revs, p, NULL, queue) < 0)
+			if (process_parents(revs, p, queue) < 0)
 				return rewrite_one_error;
 		if (p->object.flags & UNINTERESTING)
 			return rewrite_one_ok;
@@ -4020,27 +4024,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
 	}
 }
 
-static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list)
+static void merge_queue_into_prio_queue(struct prio_queue *from,
+					struct prio_queue *to)
 {
-	while (q->nr) {
-		struct commit *item = prio_queue_peek(q);
-		struct commit_list *p = *list;
-
-		if (p && p->item->date >= item->date)
-			list = &p->next;
-		else {
-			p = commit_list_insert(item, list);
-			list = &p->next; /* skip newly added item */
-			prio_queue_get(q); /* pop item */
-		}
-	}
+	while (from->nr)
+		prio_queue_put(to, prio_queue_get(from));
 }
 
 static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
 {
 	struct prio_queue queue = { compare_commits_by_commit_date };
 	enum rewrite_result ret = rewrite_one_1(revs, pp, &queue);
-	merge_queue_into_list(&queue, &revs->commits);
+	merge_queue_into_prio_queue(&queue, &revs->commit_queue);
 	clear_prio_queue(&queue);
 	return ret;
 }
@@ -4331,6 +4326,7 @@ enum rev_walk_mode {
 	REV_WALK_REFLOG,
 	REV_WALK_TOPO,
 	REV_WALK_LIMITED,
+	REV_WALK_NO_WALK,
 	REV_WALK_STREAMING,
 };
 
@@ -4342,6 +4338,8 @@ static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
 		return REV_WALK_TOPO;
 	if (revs->limited)
 		return REV_WALK_LIMITED;
+	if (revs->no_walk)
+		return REV_WALK_NO_WALK;
 	return REV_WALK_STREAMING;
 }
 
@@ -4349,6 +4347,9 @@ static struct commit *get_revision_1(struct rev_info *revs)
 {
 	enum rev_walk_mode mode = get_walk_mode(revs);
 
+	if (mode == REV_WALK_STREAMING && revs->commits)
+		rev_info_commit_list_to_queue(revs);
+
 	while (1) {
 		struct commit *commit;
 
@@ -4360,9 +4361,12 @@ static struct commit *get_revision_1(struct rev_info *revs)
 			commit = next_topo_commit(revs);
 			break;
 		case REV_WALK_LIMITED:
-		case REV_WALK_STREAMING:
+		case REV_WALK_NO_WALK:
 			commit = pop_commit(&revs->commits);
 			break;
+		case REV_WALK_STREAMING:
+			commit = prio_queue_get(&revs->commit_queue);
+			break;
 		}
 
 		if (!commit)
@@ -4390,12 +4394,13 @@ static struct commit *get_revision_1(struct rev_info *revs)
 			break;
 		case REV_WALK_STREAMING:
 			if (process_parents(revs, commit,
-					    &revs->commits, NULL) < 0) {
+					    &revs->commit_queue) < 0) {
 				if (!revs->ignore_missing_links)
 					die("Failed to traverse parents of commit %s",
 					    oid_to_hex(&commit->object.oid));
 			}
 			break;
+		case REV_WALK_NO_WALK:
 		case REV_WALK_LIMITED:
 			break;
 		}
diff --git a/revision.h b/revision.h
index 584f1338b5..04982a3d47 100644
--- a/revision.h
+++ b/revision.h
@@ -12,6 +12,7 @@
 #include "decorate.h"
 #include "ident.h"
 #include "list-objects-filter-options.h"
+#include "prio-queue.h"
 #include "strvec.h"
 
 /**
@@ -122,8 +123,14 @@ struct oidset;
 struct topo_walk_info;
 
 struct rev_info {
-	/* Starting list */
+	/*
+	 * Work queue of commits, stored as either a linked list or a
+	 * priority queue, but never both at the same time.
+	 * rev_info_commit_list_to_queue() converts list to queue.
+	 */
 	struct commit_list *commits;
+	struct prio_queue commit_queue;
+
 	struct object_array pending;
 	struct repository *repo;
 
@@ -400,6 +407,7 @@ struct rev_info {
  * uninitialized.
  */
 #define REV_INFO_INIT { \
+	.commit_queue = { .compare = compare_commits_by_commit_date }, \
 	.abbrev = DEFAULT_ABBREV, \
 	.simplify_history = 1, \
 	.pruning.flags.recursive = 1, \
@@ -478,6 +486,8 @@ void reset_revision_walk(void);
  */
 int prepare_revision_walk(struct rev_info *revs);
 
+/* Drain the commits linked list into the priority queue. */
+void rev_info_commit_list_to_queue(struct rev_info *revs);
 /**
  * Takes a pointer to a `rev_info` structure and iterates over it, returning a
  * `struct commit *` each time you call it. The end of the revision list is
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH] commit-reach: stop sorting in paint_down_to_common()
From: René Scharfe @ 2026-05-27 15:52 UTC (permalink / raw)
  To: Git List

None of the three callers of paint_down_to_common() care about the order
of its result list: merge_bases_many() sorts it again after removing
stale items, remove_redundant_no_gen() and repo_in_merge_bases_many()
throw the list away without even looking at it.  So drop the unnecessary
commit_list_sort_by_date() call.

Signed-off-by: René Scharfe <l.s.r@web.de>
---
 commit-reach.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/commit-reach.c b/commit-reach.c
index 5a52be90a6..056a7ed8d8 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -137,7 +137,6 @@ static int paint_down_to_common(struct repository *r,
 	}
 
 	clear_prio_queue(&queue);
-	commit_list_sort_by_date(result);
 	return 0;
 }
 
-- 
2.54.0

^ permalink raw reply related

* What's cooking in git.git (May 2026, #08)
From: Junio C Hamano @ 2026-05-27 16:06 UTC (permalink / raw)
  To: git

Here are the topics that have been cooking in my tree.  Commits
prefixed with '+' are in 'next' (being in 'next' is a sign that a
topic is stable enough to be used and is a candidate to be in a
future release).  Commits prefixed with '-' are only in 'seen', and
aren't considered "accepted" at all and may be annotated with a URL
to a message that raises issues but they are by no means exhaustive.
A topic without enough support may be discarded after a long period
of no activity (of course they can be resubmitted when new interests
arise).

Copies of the source code to Git live in many repositories, and the
following is a list of the ones I push into or their mirrors.  Some
repositories have only a subset of branches.

With maint, master, next, seen, todo:

	git://git.kernel.org/pub/scm/git/git.git/
	git://repo.or.cz/alt-git.git/
	https://kernel.googlesource.com/pub/scm/git/git/
	https://github.com/git/git/
	https://gitlab.com/git-scm/git/

With all the integration branches and topics broken out:

	https://github.com/gitster/git/

Even though the preformatted documentation in HTML and man format
are not sources, they are published in these repositories for
convenience (replace "htmldocs" with "manpages" for the manual
pages):

	git://git.kernel.org/pub/scm/git/git-htmldocs.git/
	https://github.com/gitster/git-htmldocs.git/

Release tarballs are available at:

	https://www.kernel.org/pub/software/scm/git/

--------------------------------------------------
[Graduated to 'master']

* ds/fetch-negotiation-options (2026-05-19) 8 commits
  (merged to 'next' on 2026-05-21 at ff57fd9c97)
 + send-pack: pass negotiation config in push
 + remote: add remote.*.negotiationInclude config
 + fetch: add --negotiation-include option for negotiation
 + negotiator: add have_sent() interface
 + remote: add remote.*.negotiationRestrict config
 + transport: rename negotiation_tips
 + fetch: add --negotiation-restrict option
 + t5516: fix test order flakiness

 The negotiation tip options in "git fetch" have been reworked to
 allow requiring certain refs to be sent as "have" lines, and to
 restrict negotiation to a specific set of refs.
 source: <pull.2085.v6.git.1779207896.gitgitgadget@gmail.com>


* en/batch-prefetch (2026-05-14) 4 commits
  (merged to 'next' on 2026-05-20 at 722acf81c8)
 + grep: prefetch necessary blobs
 + builtin/log: prefetch necessary blobs for `git cherry`
 + patch-ids.h: add missing trailing parenthesis in documentation comment
 + promisor-remote: document caller filtering contract

 In a lazy clone, "git cherry" and "git grep" often fetch necessary
 blob objects one by one from promisor remotes.  It has been corrected
 to collect necessary object names and fetch them in bulk to gain
 reasonable performance.
 cf. <0da4f159-8d4b-49e2-93c1-25aa0bf69371@gmail.com>
 source: <pull.2089.v3.git.1778775928.gitgitgadget@gmail.com>


* jk/sq-dequote-cleanup (2026-05-18) 3 commits
  (merged to 'next' on 2026-05-21 at fbedf2daea)
 + quote: simplify internals of dequoting
 + quote: drop sq_dequote_to_argv()
 + quote.h: bump strvec forward declaration to the top

 Code simplification.
 source: <20260519011837.GA1615637@coredump.intra.peff.net>


* jt/odb-transaction-write (2026-05-14) 7 commits
  (merged to 'next' on 2026-05-21 at 61108abe4d)
 + odb/transaction: make `write_object_stream()` pluggable
 + object-file: generalize packfile writes to use odb_write_stream
 + object-file: avoid fd seekback by checking object size upfront
 + object-file: remove flags from transaction packfile writes
 + odb: update `struct odb_write_stream` read() callback
 + odb/transaction: use pluggable `begin_transaction()`
 + odb: split `struct odb_transaction` into separate header
 (this branch is used by ps/odb-in-memory and ps/odb-source-loose.)

 ODB transaction interface is being reworked to explicitly handle
 object writes.
 source: <20260514183740.1505171-1-jltobler@gmail.com>


* kk/limit-list-optim (2026-05-14) 1 commit
  (merged to 'next' on 2026-05-19 at f17450dd1b)
 + revision: use priority queue in limit_list()

 The limit_list() function that is one of the core part of the
 revision traversal infrastructure has been optimized by replacing
 its use of linear list with priority queue.
 source: <pull.2114.git.1778777491939.gitgitgadget@gmail.com>


* kk/merge-octopus-optim (2026-05-11) 1 commit
  (merged to 'next' on 2026-05-20 at afe427dc66)
 + merge: use repo_in_merge_bases for octopus up-to-date check

 The logic to determine that branches in an octopus merge are
 independent has been optimized.
 cf. <c5b333f1-0db6-4aec-a369-6503cb924e7f@gmail.com>
 source: <pull.2110.git.1778566286543.gitgitgadget@gmail.com>


* kn/refs-fsck-skip-lock-files (2026-05-17) 1 commit
  (merged to 'next' on 2026-05-21 at 91e30e3543)
 + refs/files: skip lock files during consistency checks

 The consistency checks for the files reference backend have been updated
 to skip lock files earlier, avoiding unnecessary parsing of
 intermediate files.
 source: <20260517-refs-fsck-skip-lock-files-v3-1-b24dfd673c7e@gmail.com>


* pb/doc-diff-format-updates (2026-05-15) 3 commits
  (merged to 'next' on 2026-05-20 at fe8d31e9f9)
 + diff-format.adoc: mode and hash are 0* for unmerged paths from index only
 + diff-format.adoc: 'git diff-files' prints two lines for unmerged files
 + diff-format.adoc: remove mention of diff-tree specific output

 Doc updates.
 source: <pull.2304.git.git.1778860091.gitgitgadget@gmail.com>


* ps/odb-in-memory (2026-04-10) 18 commits
  (merged to 'next' on 2026-05-21 at c8709aa17f)
 + t/unit-tests: add tests for the in-memory object source
 + odb: generic in-memory source
 + odb/source-inmemory: stub out remaining functions
 + odb/source-inmemory: implement `freshen_object()` callback
 + odb/source-inmemory: implement `count_objects()` callback
 + odb/source-inmemory: implement `find_abbrev_len()` callback
 + odb/source-inmemory: implement `for_each_object()` callback
 + odb/source-inmemory: convert to use oidtree
 + oidtree: add ability to store data
 + cbtree: allow using arbitrary wrapper structures for nodes
 + odb/source-inmemory: implement `write_object_stream()` callback
 + odb/source-inmemory: implement `write_object()` callback
 + odb/source-inmemory: implement `read_object_stream()` callback
 + odb/source-inmemory: implement `read_object_info()` callback
 + odb: fix unnecessary call to `find_cached_object()`
 + odb/source-inmemory: implement `free()` callback
 + odb: introduce "in-memory" source
 + Merge branch 'jt/odb-transaction-write' into ps/odb-in-memory
 (this branch is used by ps/odb-source-loose; uses jt/odb-transaction-write.)

 Add a new odb "in-memory" source that is meant to only hold
 tentative objects (like the virtual blob object that represents the
 working tree file used by "git blame").
 source: <20260410-b4-pks-odb-source-inmemory-v3-0-22fd0fad58fe@pks.im>


* ps/setup-wo-the-repository (2026-05-19) 18 commits
  (merged to 'next' on 2026-05-21 at d8fb5a7b3e)
 + setup: stop using `the_repository` in `init_db()`
 + setup: stop using `the_repository` in `create_reference_database()`
 + setup: stop using `the_repository` in `initialize_repository_version()`
 + setup: stop using `the_repository` in `check_repository_format()`
 + setup: stop using `the_repository` in `upgrade_repository_format()`
 + setup: stop using `the_repository` in `setup_git_directory()`
 + setup: stop using `the_repository` in `setup_git_directory_gently()`
 + setup: stop using `the_repository` in `setup_git_env()`
 + setup: stop using `the_repository` in `set_git_work_tree()`
 + setup: stop using `the_repository` in `setup_work_tree()`
 + setup: stop using `the_repository` in `enter_repo()`
 + setup: stop using `the_repository` in `verify_non_filename()`
 + setup: stop using `the_repository` in `verify_filename()`
 + setup: stop using `the_repository` in `path_inside_repo()`
 + setup: stop using `the_repository` in `prefix_path()`
 + setup: stop using `the_repository` in `is_inside_work_tree()`
 + setup: stop using `the_repository` in `is_inside_git_dir()`
 + setup: replace use of `the_repository` in static functions
 (this branch is used by ps/setup-centralize-odb-creation.)

 Many uses of the_repository has been updated to use a more
 appropriate struct repository instance in setup.c codepath.
 source: <20260519-pks-setup-wo-the-repository-v3-0-a00d8ea8b07f@pks.im>


* ps/t3903-cover-stash-include-untracked (2026-05-16) 1 commit
  (merged to 'next' on 2026-05-20 at f1e7ac1cbd)
 + stash: add coverage for show --include-untracked

 Test coverage has been added to "git stash --include-untracked".
 source: <20260516183347.4323-2-pushkarkumarsingh1970@gmail.com>


* rs/trailer-fold-optim (2026-05-15) 1 commit
  (merged to 'next' on 2026-05-20 at 38c9fb15c2)
 + trailer: change strbuf in-place in unfold_value()

 Code simplification.
 source: <816be07e-2cd6-48fe-ae93-57fa0f2543ed@web.de>


* rs/use-builtin-add-overflow-explicitly-on-clang (2026-05-18) 2 commits
  (merged to 'next' on 2026-05-21 at c223b71079)
 + use __builtin_add_overflow() in st_add() with Clang
 + strbuf: use st_add3() in strbuf_grow()

 Micro optimization of codepaths that compute allocation sizes carefully.
 source: <20260518202502.25682-1-l.s.r@web.de>


* tb/incremental-midx-part-3.3 (2026-05-19) 16 commits
  (merged to 'next' on 2026-05-21 at 6c11c1a739)
 + repack: allow `--write-midx=incremental` without `--geometric`
 + repack: introduce `--write-midx=incremental`
 + repack: implement incremental MIDX repacking
 + packfile: ensure `close_pack_revindex()` frees in-memory revindex
 + builtin/repack.c: convert `--write-midx` to an `OPT_CALLBACK`
 + repack-geometry: prepare for incremental MIDX repacking
 + repack-midx: extract `repack_fill_midx_stdin_packs()`
 + repack-midx: factor out `repack_prepare_midx_command()`
 + midx: expose `midx_layer_contains_pack()`
 + repack: track the ODB source via existing_packs
 + midx: support custom `--base` for incremental MIDX writes
 + midx: introduce `--no-write-chain-file` for incremental MIDX writes
 + midx: use `strvec` for `keep_hashes`
 + midx: build `keep_hashes` array in order
 + midx: use `strset` for retained MIDX files
 + midx-write: handle noop writes when converting incremental chains

 The repacking code has been refactored and compaction of MIDX layers
 have been implemented, and incremental strategy that does not require
 all-into-one repacking has been introduced.
 source: <cover.1779206239.git.me@ttaylorr.com>


* tb/pseudo-merge-bugfixes (2026-05-11) 9 commits
  (merged to 'next' on 2026-05-19 at ecee155d5c)
 + pack-bitmap: prevent pattern leak on pseudo-merge re-assignment
 + Documentation: fix broken `sampleRate` in gitpacking(7)
 + pack-bitmap: reject pseudo-merge "sampleRate" of 0
 + pack-bitmap: parse commits in `find_pseudo_merge_group_for_ref()`
 + pack-bitmap: fix pseudo-merge lookup for shared commits
 + pack-bitmap: fix inverted binary search in `pseudo_merge_at()`
 + pack-bitmap-write: sort pseudo-merge commit lookup table in pack order
 + t5333: demonstrate various pseudo-merge bugs
 + t/helper: add 'test-tool bitmap write' subcommand
 (this branch is used by tb/bitmap-build-performance.)

 Fixes many bugs in pseudo-merge code.
 source: <cover.1778546804.git.me@ttaylorr.com>

--------------------------------------------------
[New Topics]

* za/completion-hide-dotfiles (2026-05-26) 1 commit
 - completion: hide dotfiles for selected path completion

 The path completion for commands like `git rm` and `git mv`, is being
 updated to hide dotfiles by default, unless the user explicitly starts
 the path with a dot, matching standard shell-completion behavior.

 Will merge to 'next'?
 source: <pull.2311.v2.git.git.1779808987825.gitgitgadget@gmail.com>

--------------------------------------------------
[Cooking]

* kk/fetch-store-ref-optimization (2026-05-24) 1 commit
 - fetch: pass transport to post-fetch connectivity check

 When fetching from a transport that provides a self-contained pack,
 pass the transport pointer to the post-fetch `check_connected()` call
 to optimize connectivity check.

 Will merge to 'next'?
 source: <pull.2123.git.1779625693328.gitgitgadget@gmail.com>


* ds/restore-sparse-index (2026-05-26) 2 commits
 - restore: avoid sparse index expansion
 - t1092: test 'git restore' with sparse index

 'git restore --staged' has been optimized to avoid unnecessarily expanding
 the sparse index when operating on paths within the sparse checkout
 definition, by handling sparse directory entries at the tree level.

 Will merge to 'next'.
 source: <pull.2121.v2.git.1779827195.gitgitgadget@gmail.com>


* kk/commit-reach-optim (2026-05-25) 3 commits
 - commit-reach: replace queue_has_nonstale() scan with O(1) tracking
 - commit-reach: deduplicate queue entries in paint_down_to_common
 - object.h: fix stale entries in object flag allocation table

 The check for non-stale commits in the priority queue used by
 `paint_down_to_common` and `ahead_behind` has been optimized by
 replacing an O(N) scan with an O(1) counter, yielding performance
 improvements in repositories with wide histories.

 Will merge to 'next'?
 source: <pull.2124.v2.git.1779719286.gitgitgadget@gmail.com>


* ar/receive-pack-worktree-env (2026-05-25) 1 commit
  (merged to 'next' on 2026-05-27 at 9c246d1969)
 + receive-pack: fix updateInstead with core.worktree

 The GIT_WORK_TREE variable prepared to invoke the push-to-checkout
 hook was leaking into the environment even when there was no hook
 used and broke the default push-to-deploy (i.e., let "git checkout"
 update the working tree only when the working tree is clean).

 Will merge to 'master'.
 source: <20260525162311.66240-2-hi@alyssa.is>


* ib/doc-push-default-simple (2026-05-25) 1 commit
 - doc: clarify push.default=simple behavior

 The documentation for `push.default = simple` has been clarified to
 better explain its behavior, making it clear that it pushes the
 current branch to a same-named branch on the remote, and detailing
 the upstream requirements for centralized workflows.

 Comments?
 source: <pull.2115.v2.git.1779767888508.gitgitgadget@gmail.com>


* jc/doc-monitor-ghci (2026-05-24) 1 commit
 - SubmittingPatches: proactively monitor GHCI pages

 Encourage original authors to monitor the CI status.

 Will merge to 'next'?
 source: <xmqq1pf0gpp3.fsf@gitster.g>


* ec/commit-fixup-options (2026-05-26) 2 commits
 - commit: allow -c/-C for all kinds of --fixup
 - commit: allow -m/-F for all kinds of --fixup

 The -m/-F/-c/-C options to supply commit log message from outside the
 editor are now supported for all "git commit --fixup" variations.

 Comments?
 source: <cover.1779792311.git.erik@cervined.in>


* gh/jump-auto-mode (2026-05-21) 1 commit
 - git-jump: pick a mode automatically when invoked without arguments

 The 'git-jump' command (in contrib/) has been taught to automatically
 pick a mode (merge, diff, or ws) when invoked without arguments.

 Comments?
 source: <pull.2108.v3.git.1779371110195.gitgitgadget@gmail.com>


* sp/doc-range-diff-takes-notes (2026-05-20) 1 commit
  (merged to 'next' on 2026-05-22 at 020bec81b7)
 + Documentation/git-range-diff: add missing notes options in synopsis

 Docfix.

 Will merge to 'master'.
 source: <20260521052841.73775-1-siddh.raman.pant@oracle.com>


* ps/odb-source-loose (2026-05-21) 19 commits
 - odb/source-loose: drop pointer to the "files" source
 - odb/source-loose: stub out remaining callbacks
 - odb/source-loose: wire up `write_object_stream()` callback
 - object-file: refactor writing objects to use loose source
 - odb/source-loose: wire up `write_object()` callback
 - loose: refactor object map to operate on `struct odb_source_loose`
 - odb/source-loose: wire up `freshen_object()` callback
 - odb/source-loose: drop `odb_source_loose_has_object()`
 - odb/source-loose: wire up `count_objects()` callback
 - odb/source-loose: wire up `find_abbrev_len()` callback
 - odb/source-loose: wire up `for_each_object()` callback
 - odb/source-loose: wire up `read_object_stream()` callback
 - odb/source-loose: wire up `read_object_info()` callback
 - odb/source-loose: wire up `close()` callback
 - odb/source-loose: wire up `reprepare()` callback
 - odb/source-loose: start converting to a proper `struct odb_source`
 - odb/source-loose: store pointer to "files" instead of generic source
 - odb/source-loose: move loose source into "odb/" subsystem
 - Merge branch 'ps/odb-in-memory' into ps/odb-source-loose

 The loose object source has been refactored into a proper `struct
 odb_source`.

 Comments?
 source: <20260521-b4-pks-odb-source-loose-v1-0-6553b399be2d@pks.im>


* ps/setup-centralize-odb-creation (2026-05-25) 9 commits
 - setup: construct object database in `apply_repository_format()`
 - repository: stop reading loose object map twice on repo init
 - setup: stop initializing object database without repository
 - setup: stop creating the object database in `setup_git_env()`
 - repository: stop initializing the object database in `repo_set_gitdir()`
 - setup: deduplicate logic to apply repository format
 - setup: drop `setup_git_env()`
 - t0001: plug test gaps for git-init(1) with GIT_OBJECT_DIRECTORY
 - Merge branch 'ps/setup-wo-the-repository' into ps/setup-centralize-odb-creation

 The setup logic to discover and configure repositories has been
 refactored, and the initialization of the object database has been
 centralized.

 Comments?
 source: <20260526-b4-pks-setup-centralize-odb-creation-v2-0-2fa5b385c13e@pks.im>


* ps/gitlab-ci-macOS-improvements (2026-05-21) 2 commits
  (merged to 'next' on 2026-05-22 at aaa3c7021e)
 + gitlab-ci: update macOS image
 + gitlab-ci: upgrade macOS runners

 Update GitLab CI jobs that exercise macOS.

 Will merge to 'master'.
 source: <20260521-b4-pks-gitlab-ci-updates-v1-0-53bb46ed33e0@pks.im>


* kh/doc-hook (2026-05-21) 4 commits
  (merged to 'next' on 2026-05-25 at 5e41d13adf)
 + doc: hook: don’t self-link via config include
 + doc: config: include existing git-hook(1) section
 + doc: hook: consistently capitalize Git
 + doc: hook: remove stray backtick

 Doc updates.

 Will merge to 'master'.
 cf. <2832179.mvXUDI8C0e@piment-oiseau>
 source: <CV_doc_hook.6f0@msgid.xyz>


* kh/doc-replay-config (2026-05-21) 4 commits
 - doc: replay: move “default” to the right-hand-side
 - doc: replay: use a nested definition list
 - doc: replay: simplify replay.refAction description
 - doc: link to config for git-replay(1)

 Doc update for "git replay" to actually refer to its configuration
 variables.

 Comments?
 source: <CV_doc_replay_config.709@msgid.xyz>


* jk/commit-graph-lazy-load-fallback (2026-05-18) 1 commit
  (merged to 'next' on 2026-05-22 at d1188df466)
 + commit: fall back to full read when maybe_tree is NULL

 The logic to lazy-load trees from the commit-graph has been made
 more robust by falling back to reading the commit object when
 the commit-graph is no longer available.

 Will merge to 'master'.
 source: <20260519061534.GA1709881@coredump.intra.peff.net>


* jk/connect-service-enum (2026-05-21) 2 commits
  (merged to 'next' on 2026-05-24 at 293561cbc5)
 + transport-helper: fix typo in BUG() message
  (merged to 'next' on 2026-05-21 at fd80c61e21)
 + connect: use "service" enum for "name" argument

 The "name" argument in git_connect() and related functions has been
 converted to a "service" enum to improve type safety and clarify its
 purpose.

 Will merge to 'master'.
 source: <20260519052219.GA1703179@coredump.intra.peff.net>
 source: <20260522044352.GA861761@coredump.intra.peff.net>


* aj/stash-patch-optimize-temporary-index (2026-05-22) 1 commit
 - stash: reuse cached index entries in --patch temporary index

 "git stash -p" has been optimized by reusing cached index
 entries in its temporary index, avoiding unnecessary lstat()
 calls on unchanged files.

 Will merge to 'next'?
 source: <pull.2306.v2.git.git.1779491545531.gitgitgadget@gmail.com>


* tb/bitmap-build-performance (2026-05-19) 9 commits
 - pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
 - pack-bitmap: remember pseudo-merge parents
 - pack-bitmap: sort bitmaps before XORing
 - pack-bitmap: cache object positions during fill
 - pack-bitmap: consolidate `find_object_pos()` success path
 - pack-bitmap: reuse stored selected bitmaps
 - pack-bitmap: check subtree bits before recursing
 - pack-bitmap: pass object position to `fill_bitmap_tree()`
 - Merge branch 'tb/pseudo-merge-bugfixes' into tb/bitmap-build-performance

 Reachability bitmap generation has been significantly optimized. By
 reordering tree traversal, caching object positions, and refining how
 pseudo-merge bitmaps are constructed, the performance of "git repack
 --write-midx-bitmaps" is improved, especially for large repositories
 and when using pseudo-merges.

 Comments?
 source: <cover.1779207127.git.me@ttaylorr.com>


* hn/status-pull-advice-qualified (2026-05-21) 1 commit
 - remote: qualify "git pull" advice for non-upstream compareBranches

 Advice shown by "git status" when the local branch is behind or has
 diverged from its push branch has been updated to suggest "git pull
 <remote> <branch>".

 Comments?
 source: <pull.2301.v4.git.git.1779372367317.gitgitgadget@gmail.com>


* rs/strbuf-add-uint (2026-05-12) 4 commits
 - ls-tree: use strbuf_add_uint()
 - ls-files: use strbuf_add_uint()
 - cat-file: use strbuf_add_uint()
 - strbuf: add strbuf_add_uint()

 Adding a decimal integer with strbuf_addf("%u") appears commonly;
 they have been optimized by using a custom formatter.

 Comments?
 source: <20260512115603.80780-1-l.s.r@web.de>


* ta/approxidate-noon-fix (2026-05-21) 4 commits
  (merged to 'next' on 2026-05-25 at 2dd9ce3c54)
 + approxidate: use deferred mday adjustments for "specials"
 + approxidate: make "specials" respect fixed day-of-month
 + t0006: add support for approxidate test date adjustment
 + approxidate: make "today" wrap to midnight

 "Friday noon" asked in the morning on Sunday was parsed to be one
 day before the specified time, which has been corrected.

 Will merge to 'master'.
 source: <20260521105408.8222-1-taahol@utu.fi>


* mm/doc-word-diff (2026-05-13) 1 commit
 - doc: clarify that --word-diff operates on line-level hunks

 The documentation for "--word-diff" has been extended with a bit of
 implementation detail of where these different words come from.

 Comments?
 source: <pull.2113.git.1778686956622.gitgitgadget@gmail.com>


* rs/strbuf-add-oid-hex (2026-05-13) 1 commit
 - hex: add and use strbuf_add_oid_hex()

 Formatting object name in full hexadecimal form has been optimized
 by using a new strbuf_add_oid_hex() helper function.

 Comments?
 source: <183aa0fd-d455-4ec9-9c42-d511fac8b3e4@web.de>


* ed/check-connected-close-err-fd (2026-05-16) 1 commit
  (merged to 'next' on 2026-05-22 at 00d592399e)
 + Merge branch 'ed/check-connected-close-err-fd-2.53' into ed/check-connected-close-err-fd
 (this branch uses ed/check-connected-close-err-fd-2.53.)

 File descriptor leak fix.

 Will merge to 'master'.
 (this branch uses ed/check-connected-close-err-fd-2.53.)


* ed/check-connected-close-err-fd-2.53 (2026-05-14) 1 commit
  (merged to 'next' on 2026-05-22 at 1017d0e022)
 + connected: close err_fd in promisor fast-path
 (this branch is used by ed/check-connected-close-err-fd.)

 File descriptor leak fix (for 2.54 maintenance track).

 Will merge to 'master'.
 source: <pull.2303.git.git.1778827194448.gitgitgadget@gmail.com>


* kk/tips-reachable-from-bases-optim (2026-05-16) 2 commits
  (merged to 'next' on 2026-05-22 at 87d6b8e666)
 + t6600: add tests for duplicate tips in tips_reachable_from_bases()
 + commit-reach: use object flags for tips_reachable_from_bases()

 Revision traversal optimization.

 Will merge to 'master'.
 source: <pull.2116.v3.git.1778947182.gitgitgadget@gmail.com>


* tc/generate-configlist-fix-for-older-ninja (2026-05-15) 1 commit
  (merged to 'next' on 2026-05-22 at 8322bfb8f2)
 + generate-configlist: collapse depfile for older Ninja

 Build update.

 Will merge to 'master'.
 source: <20260515-toon-fix-almalinux8-v3-1-b545a0647f0f@iotcl.com>


* hn/config-typo-advice (2026-05-25) 1 commit
 - config: suggest the correct form when key contains "=" in set context

 "git config foo.bar=baz" is not likely to be a request to read the
 value of such a variable with '=' in its name; rather it is plausible
 that the user meant "git config set foo.bar baz".  Give advice when
 giving an error message.

 Comments?
 source: <pull.2302.v3.git.git.1779697995418.gitgitgadget@gmail.com>


* ja/doc-synopsis-style-again (2026-05-25) 6 commits
 - doc: convert git-imap-send synopsis and options to new style
 - doc: convert git-apply synopsis and options to new style
 - doc: convert git-am synopsis and options to new style
 - doc: convert git-grep synopsis and options to new style
 - doc: git bisect: clarify the usage of the synopsis vs actual command
 - doc: convert git-bisect to synopsis style

 A batch of documentation pages has been updated to use the modern
 synopsis style.

 Will merge to 'next'?
 source: <pull.2117.v2.git.1779704908.gitgitgadget@gmail.com>


* jt/config-lock-timeout (2026-05-17) 1 commit
 - config: retry acquiring config.lock, configurable via core.configLockTimeout

 Configuration file locking now retries for a short period, avoiding
 failures when multiple processes attempt to update the configuration
 simultaneously.

 Comments?
 cf. <xmqqzf1xbl4i.fsf@gitster.g>
 source: <20260517132111.1014901-1-joerg@thalheim.io>


* hn/branch-prune-merged (2026-05-22) 6 commits
 - branch: add --dry-run for --prune-merged
 - branch: add branch.<name>.pruneMerged opt-out
 - branch: add --prune-merged <branch>
 - branch: prepare delete_branches for a bulk caller
 - branch: let delete_branches warn instead of error on bulk refusal
 - branch: add --forked <branch>

 "git branch" command learned "--prune-merged" option to remove
 local branches that have already been merged to the remote-tracking
 branches they track.

 Comments?
 source: <pull.2285.v11.git.git.1779449498.gitgitgadget@gmail.com>


* st/daemon-sockaddr-fixes (2026-05-14) 3 commits
 - daemon: guard NULL REMOTE_PORT in execute() logging
 - daemon: fix IPv6 address truncation in ip2str()
 - daemon: fix IPv6 address corruption in lookup_hostname()

 Correct use of sockaddr API in "git daemon".

 Waiting for response(s) to review comment(s).
 cf. <agGLRC1ziF5F8Okh@pks.im>
 source: <pull.2300.git.git.1778773592.gitgitgadget@gmail.com>


* ob/more-repo-config-values (2026-04-23) 8 commits
 - env: move "warn_on_object_refname_ambiguity" into `struct repo_config_values`
 - env: move "sparse_expect_files_outside_of_patterns" into `repo_config_values`
 - env: move "core_sparse_checkout_cone" into `struct repo_config_values`
 - environment: move "precomposed_unicode" into `struct repo_config_values`
 - environment: move "pack_compression_level" into `struct repo_config_values`
 - environment: move `zlib_compression_level` into `struct repo_config_values`
 - environment: move "check_stat" into `struct repo_config_values`
 - environment: move "trust_ctime" into `struct repo_config_values`

 Expecting a reroll.
 cf. <CAD=f0L8-_3sDGGkCzF4WA0xmUtaY_qiz__3zq5AemLgwTsqvsg@mail.gmail.com>
 cf. <xmqqlddqu013.fsf@gitster.g>
 source: <20260423165432.143598-1-belkid98@gmail.com>


* cc/promisor-auto-config-url-more (2026-05-19) 9 commits
 - doc: promisor: improve acceptFromServer entry
 - promisor-remote: auto-configure unknown remotes
 - promisor-remote: trust known remotes matching acceptFromServerUrl
 - promisor-remote: introduce promisor.acceptFromServerUrl
 - promisor-remote: add 'local_name' to 'struct promisor_info'
 - urlmatch: add url_normalize_pattern() helper
 - urlmatch: change 'allow_globs' arg to bool
 - t5710: simplify 'mkdir X' followed by 'git -C X init'
 - Merge branch 'cc/promisor-auto-config-url' into cc/promisor-auto-config-url-more

 The handling of promisor-remote protocol capability has been
 loosened to allow the other side to add to the list of promisor
 remotes via the promisor.acceptFromServerURL configuration
 variable.

 Comments?
 source: <20260519153808.494105-1-christian.couder@gmail.com>


* hn/checkout-track-fetch (2026-05-23) 2 commits
 - checkout: extend --track with a "fetch" mode to refresh start-point
 - branch: expose helpers for finding the remote owning a tracking ref

 "git checkout --track=..." learned to optionally fetch the branch
 from the remote the new branch will work with.

 Comments?
 source: <pull.2281.v13.git.git.1779565714.gitgitgadget@gmail.com>


* mf/revision-max-count-oldest (2026-05-18) 1 commit
 - revision.c: implement --max-count-oldest

 "git rev-list" (and "git log" family of commands) learned a new "--max-count-oldest"
 that picks oldest N commits in the range instead of the usual newest.

 Comments?
 source: <8210d60832b9a58aa4d71fc3790e44d8989564ce.1779152064.git.mroik@delayed.space>


* mm/line-log-cleanup (2026-05-25) 3 commits
 - line-log: allow non-patch diff formats with -L
 - line-log: integrate -L output with the standard log-tree pipeline
 - revision: move -L setup before output_format-to-diff derivation

 The `git log -L` implementation has been refactored to use the
 standard diff output pipeline, enabling pickaxe and diff-filter to
 work as expected. Additionally, metadata-only diff formats like
 --raw and --name-only are now supported with -L.

 Will merge to 'next'?
 source: <pull.2120.v2.git.1779733799.gitgitgadget@gmail.com>


* ds/path-walk-filters (2026-05-22) 14 commits
  (merged to 'next' on 2026-05-25 at eccb829b10)
 + path-walk: support `combine` filter
 + path-walk: support `object:type` filter
 + path-walk: support `tree:0` filter
 + t6601: tag otherwise-unreachable trees
 + pack-objects: support sparse:oid filter with path-walk
 + path-walk: add pl_sparse_trees to control tree pruning
 + path-walk: support blob size limit filter
 + backfill: die on incompatible filter options
 + path-walk: support blobless filter
 + path-walk: always emit directly-requested objects
 + t/perf: add pack-objects filter and path-walk benchmark
 + pack-objects: pass --objects with --path-walk
 + t5620: make test work with path-walk var
 + Merge branch 'en/backfill-fixes-and-edges' into ds/path-walk-filters

 The "git pack-objects --path-walk" traversal has been integrated
 with several object filters, including blobless and sparse filters.

 Will merge to 'master'.
 source: <pull.2101.v5.git.1779474277.gitgitgadget@gmail.com>


* en/ort-harden-against-corrupt-trees (2026-04-20) 5 commits
 - cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts
 - merge-ort: abort merge when trees have duplicate entries
 - merge-ort: free diff pairs queue in clear_or_reinit_internal_opts()
 - merge-ort: drop unnecessary show_all_errors from collect_merge_info()
 - merge-ort: propagate callback errors from traverse_trees_wrapper()

 "ort" merge backend handles merging corrupt trees better by
 aborting when it should.

 Needs review.
 source: <pull.2096.git.1776731171.gitgitgadget@gmail.com>


* pw/status-rebase-todo (2026-05-01) 2 commits
 - status: improve rebase todo list parsing
 - sequencer: factor out parsing of todo commands

 The display of the rebase todo list in "git status" has been
 improved to correctly abbreviate object IDs for more commands and
 avoid misinterpreting refs as object IDs.

 Needs review.
 source: <cover.1777648598.git.phillip.wood@dunelm.org.uk>


* cl/conditional-config-on-worktree-path (2026-05-24) 2 commits
 - config: add "worktree" and "worktree/i" includeIf conditions
 - config: refactor include_by_gitdir() into include_by_path()

 The [includeIf "condition"] conditional inclusion facility for
 configuration files has learned to use the location of worktree
 in its condition.

 Ready?
 source: <20260525-includeif-worktree-v5-0-1efe525d025a@black-desk.cn>


* ps/shift-root-in-graph (2026-04-27) 1 commit
 - graph: add indentation for commits preceded by a parentless commit

 In a history with more than one root commit, "git log --graph
 --oneline" stuffed an unrelated commit immediately below a root
 commit, which has been corrected by making the spot below a root
 unavailable.

 Waiting for response(s) to review comment(s).
 cf. <20260513230216.GA1378627@coredump.intra.peff.net>
 source: <20260427102838.44867-2-pabloosabaterr@gmail.com>


* lp/repack-propagate-promisor-debugging-info (2026-04-18) 6 commits
 - repack-promisor: add missing headers
 - t7703: test for promisor file content after geometric repack
 - t7700: test for promisor file content after repack
 - repack-promisor: preserve content of promisor files after repack
 - repack-promisor add helper to fill promisor file after repack
 - pack-write: add explanation to promisor file content

 When fetching objects into a lazily cloned repository, .promisor
 files are created with information meant to help debugging.  "git
 repack" has been taught to carry this information forward to
 packfiles that are newly created.

 Needs review.
 cf. <xmqqse7xm8av.fsf@gitster.g>
 source: <cover.1776384902.git.lorenzo.pegorari2002@gmail.com>


* th/promisor-quiet-per-repo (2026-04-06) 1 commit
 - promisor-remote: fix promisor.quiet to use the correct repository

 The "promisor.quiet" configuration variable was not used from
 relevant submodules when commands like "grep --recurse-submodules"
 triggered a lazy fetch, which has been corrected.

 Comments?
 source: <20260406183041.783800-1-vikingtc4@gmail.com>


* sa/cat-file-batch-mailmap-switch (2026-04-15) 1 commit
  (merged to 'next' on 2026-05-22 at 197a9bad73)
 + cat-file: add mailmap subcommand to --batch-command

 "git cat-file --batch" learns an in-line command "mailmap"
 that lets the user toggle use of mailmap.

 Will merge to 'master'.
 cf. <xmqqwlwy4v7t.fsf@gitster.g>
 source: <20260416033250.4327-2-siddharthasthana31@gmail.com>


* jd/unpack-trees-wo-the-repository (2026-03-31) 2 commits
 - unpack-trees: use repository from index instead of global
 - unpack-trees: use repository from index instead of global

 A handful of inappropriate uses of the_repository have been
 rewritten to use the right repository structure instance in the
 unpack-trees.c codepath.

 Comments?
 source: <pull.2258.v2.git.git.1774971267.gitgitgadget@gmail.com>


* kh/doc-trailers (2026-04-13) 9 commits
 - doc: interpret-trailers: document comment line treatment
 - doc: interpret-trailers: commit to “trailer block” term
 - doc: interpret-trailers: add key format example
 - doc: interpret-trailers: explain key format
 - doc: interpret-trailers: explain the format after the intro
 - doc: interpret-trailers: not just for commit messages
 - doc: interpret-trailers: use “metadata” in Name as well
 - doc: interpret-trailers: replace “lines” with “metadata”
 - doc: interpret-trailers: stop fixating on RFC 822

 Documentation updates.

 Needs review.
 cf. <xmqq1pfivfa3.fsf@gitster.g>
 source: <V2_CV_doc_int-tr_key_format.613@msgid.xyz>


* ps/graph-lane-limit (2026-03-27) 3 commits
  (merged to 'next' on 2026-05-22 at ca1c5e8432)
 + graph: add truncation mark to capped lanes
 + graph: add --graph-lane-limit option
 + graph: limit the graph width to a hard-coded max

 The graph output from commands like "git log --graph" can now be
 limited to a specified number of lanes, preventing overly wide output
 in repositories with many branches.

 Will merge to 'master'.
 cf. <bdff0a5d-b738-4053-9b72-08eba88156de@kdbg.org>
 source: <20260328001113.1275291-1-pabloosabaterr@gmail.com>


* jr/bisect-custom-terms-in-output (2026-05-14) 3 commits
  (merged to 'next' on 2026-05-22 at 1ccd1056c9)
 + rev-parse: use selected alternate terms to look up refs
 + bisect: print bisect terms in single quotes
 + bisect: use selected alternate terms in status output

 "git bisect" now uses the selected terms (e.g., old/new) more
 consistently in its output.

 Will merge to 'master'.
 source: <20260514-bisect-terms-v4-0-b3e3cf1b06ce@schlaraffenlan.de>


* ua/push-remote-group (2026-05-03) 3 commits
 - push: support pushing to a remote group
 - remote: move remote group resolution to remote.c
 - remote: fix sign-compare warnings in push_cas_option

 "git push" learned to take a "remote group" name to push to, which
 causes pushes to multiple places, just like "git fetch" would do.

 Comments?
 source: <20260503153402.1333220-1-usmanakinyemi202@gmail.com>


* js/parseopt-subcommand-autocorrection (2026-04-27) 11 commits
 - SQUASH???
 - doc: document autocorrect API
 - parseopt: add tests for subcommand autocorrection
 - parseopt: enable subcommand autocorrection for git-remote and git-notes
 - parseopt: autocorrect mistyped subcommands
 - autocorrect: provide config resolution API
 - autocorrect: rename AUTOCORRECT_SHOW to AUTOCORRECT_HINT
 - autocorrect: use mode and delay instead of magic numbers
 - help: move tty check for autocorrection to autocorrect.c
 - help: make autocorrect handling reusable
 - parseopt: extract subcommand handling from parse_options_step()

 The parse-options library learned to auto-correct misspelled
 subcommand names.

 Expecting a reroll.
 cf. <xmqqcxz2tzpr.fsf@gitster.g>
 source: <SY0P300MB0801677A2A1E0FD38D06A841CE2A2@SY0P300MB0801.AUSP300.PROD.OUTLOOK.COM>


* jc/neuter-sideband-post-3.0 (2026-03-05) 2 commits
 - sideband: delay sanitizing by default to Git v3.0
 - Merge branch 'jc/neuter-sideband-fixup' into jc/neuter-sideband-post-3.0

 The final step, split from earlier attempt by Dscho, to loosen the
 sideband restriction for now and tighten later at Git v3.0 boundary.

 On hold to help the base topic with wider exposure.
 (this branch uses jc/neuter-sideband-fixup.)
 source: <20260305233452.3727126-8-gitster@pobox.com>


* cs/subtree-split-recursion (2026-03-05) 3 commits
 - contrib/subtree: reduce recursion during split
 - contrib/subtree: functionalize split traversal
 - contrib/subtree: reduce function side-effects

 When processing large history graphs on Debian or Ubuntu, "git
 subtree" can die with a "recursion depth reached" error.

 Comments?
 source: <20260305-cs-subtree-split-recursion-v2-0-7266be870ba9@howdoi.land>


* pt/fsmonitor-linux (2026-04-15) 13 commits
  (merged to 'next' on 2026-05-22 at 5d99c1765d)
 + fsmonitor: convert shown khash to strset in do_handle_client
 + fsmonitor: add tests for Linux
 + fsmonitor: add timeout to daemon stop command
 + fsmonitor: close inherited file descriptors and detach in daemon
 + run-command: add close_fd_above_stderr option
 + fsmonitor: implement filesystem change listener for Linux
 + fsmonitor: rename fsm-settings-darwin.c to fsm-settings-unix.c
 + fsmonitor: rename fsm-ipc-darwin.c to fsm-ipc-unix.c
 + fsmonitor: use pthread_cond_timedwait for cookie wait
 + compat/win32: add pthread_cond_timedwait
 + fsmonitor: fix hashmap memory leak in fsmonitor_run_daemon
 + fsmonitor: fix khash memory leak in do_handle_client
 + t9210, t9211: disable GIT_TEST_SPLIT_INDEX for scalar clone tests

 The fsmonitor daemon has been implemented for Linux.

 Will merge to 'master'.
 cf. <xmqqa4u5nnxq.fsf@gitster.g>
 source: <pull.2147.v15.git.git.1776259657.gitgitgadget@gmail.com>

^ permalink raw reply

* Re: [PATCH 6/8] pack-bitmap: sort bitmaps before XORing
From: Taylor Blau @ 2026-05-27 16:56 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
In-Reply-To: <20260527100406.GG981444@coredump.intra.peff.net>

On Wed, May 27, 2026 at 06:04:06AM -0400, Jeff King wrote:
> On Tue, May 19, 2026 at 12:12:50PM -0400, Taylor Blau wrote:
>
> > Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
> > 10 away. However, when callers provide selected commits in an arbitrary
> > order, the writer may miss good ancestor/descendant pairs and produce
> > much larger bitmap files without changing query coverage.
> >
> > Sort the selected bitmaps in date order (from oldest to newest) before
> > computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
> > deal with separately in following commits).
>
> That order certainly makes the most sense. I'd have thought we ended up
> there incidentally because of the order in which we consider the
> commits, but perhaps not. I wonder if this got much worse when we
> re-wrote the bitmap generation code a few years ago.
>
> That was in v2.31.0, I think. Repacking linux.git with bitmaps, though,
> I couldn't find any difference in size between v2.30 and v2.31. They're
> both ~67M. But that also didn't shrink with this patch, either.
>
> If you have some spare CPU cycles to burn, I would be interested in a
> comparison of the bitmap size of your test repo using v2.30.0, v2.31.1,
> and this patch.

I started running this experiment, but I don't think I actually have
enough CPU cycles to let it finish ;-). Pre-v2.31 bitmap generation is
*really* slow[^1], and after multiple hours (forcing the same selection
of bitmaps by back-porting and adjusting 'test-tool bitmap') I couldn't
seem to make any meaningful progress.

I'm sure that you could get some plausible numbers out of benchmarking
this on a smaller repository. In case you're interested, here's the
patch I wrote on top of v2.30.0:

--- 8< ---
diff --git a/Makefile b/Makefile
index 7b64106930a..9ce9f2b483c 100644
--- a/Makefile
+++ b/Makefile
@@ -690,6 +690,7 @@ X =
 PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))

 TEST_BUILTINS_OBJS += test-advise.o
+TEST_BUILTINS_OBJS += test-bitmap.o
 TEST_BUILTINS_OBJS += test-bloom.o
 TEST_BUILTINS_OBJS += test-chmtime.o
 TEST_BUILTINS_OBJS += test-config.o
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 5e998bdaa79..55dbb475120 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -113,7 +113,7 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
 static struct object **seen_objects;
 static unsigned int seen_objects_nr, seen_objects_alloc;

-static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
+void bitmap_writer_push_commit(struct commit *commit, struct ewah_bitmap *reused)
 {
 	if (writer.selected_nr >= writer.selected_alloc) {
 		writer.selected_alloc = (writer.selected_alloc + 32) * 2;
@@ -402,7 +402,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,

 	if (indexed_commits_nr < 100) {
 		for (i = 0; i < indexed_commits_nr; ++i)
-			push_bitmapped_commit(indexed_commits[i], NULL);
+			bitmap_writer_push_commit(indexed_commits[i], NULL);
 		return;
 	}

@@ -440,7 +440,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
 			}
 		}

-		push_bitmapped_commit(chosen, reused_bitmap);
+		bitmap_writer_push_commit(chosen, reused_bitmap);

 		i += next + 1;
 		display_progress(writer.progress, i);
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 1203120c432..a882efdb16a 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -74,6 +74,8 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
 				    struct pack_idx_entry **index,
 				    uint32_t index_nr);
 void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack);
+void bitmap_writer_push_commit(struct commit *commit,
+			       struct ewah_bitmap *reused);
 void bitmap_writer_select_commits(struct commit **indexed_commits,
 		unsigned int indexed_commits_nr, int max_bitmaps);
 void bitmap_writer_build(struct packing_data *to_pack);
diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c
new file mode 100644
index 00000000000..9be03377c06
--- /dev/null
+++ b/t/helper/test-bitmap.c
@@ -0,0 +1,119 @@
+#define USE_THE_REPOSITORY_VARIABLE
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "commit.h"
+#include "packfile.h"
+#include "pack-bitmap.h"
+
+static int add_packed_object(const struct object_id *oid,
+			     struct packed_git *pack,
+			     uint32_t pos,
+			     void *_data)
+{
+	struct packing_data *packed = _data;
+	struct object_entry *entry;
+	struct object_info oi = OBJECT_INFO_INIT;
+	enum object_type type;
+
+	oi.typep = &type;
+
+	entry = packlist_alloc(packed, oid);
+	entry->in_pack_offset = nth_packed_object_offset(pack, pos);
+	entry->idx.offset = entry->in_pack_offset;
+	if (packed_object_info(the_repository, pack, entry->in_pack_offset, &oi) < 0)
+		die("could not get type of object %s",
+		    oid_to_hex(oid));
+	oe_set_type(entry, type);
+	oe_set_in_pack(packed, entry, pack);
+
+	return 0;
+}
+
+static int idx_oid_cmp(const void *va, const void *vb)
+{
+	const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va;
+	const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb;
+
+	return oidcmp(&a->oid, &b->oid);
+}
+
+static int bitmap_write(const char *basename)
+{
+	struct packed_git *p = NULL;
+	struct packing_data packed = { 0 };
+	struct pack_idx_entry **index;
+	struct strbuf buf = STRBUF_INIT;
+	uint32_t i;
+
+	prepare_repo_settings(the_repository);
+	for (p = get_all_packs(the_repository); p; p = p->next) {
+		if (!strcmp(pack_basename(p), basename))
+			break;
+	}
+
+	if (!p)
+		die("could not find pack '%s'", basename);
+
+	if (open_pack_index(p))
+		die("cannot open pack index for '%s'", p->pack_name);
+
+	prepare_packing_data(the_repository, &packed);
+
+	for_each_object_in_pack(p, add_packed_object, &packed,
+				FOR_EACH_OBJECT_PACK_ORDER);
+
+	/*
+	 * Build the index array now that data.packed.objects[] is
+	 * fully allocated (packlist_alloc() may have reallocated it
+	 * during the loop above).
+	 */
+	ALLOC_ARRAY(index, p->num_objects);
+	for (i = 0; i < p->num_objects; i++)
+		index[i] = &packed.objects[i].idx;
+
+	bitmap_writer_build_type_index(&packed, index, p->num_objects);
+
+	while (strbuf_getline_lf(&buf, stdin) != EOF) {
+		struct object_id oid;
+		struct commit *c;
+
+		if (get_oid_hex(buf.buf, &oid))
+			die("invalid OID: %s", buf.buf);
+
+		c = lookup_commit(the_repository, &oid);
+		if (!c || repo_parse_commit(the_repository, c))
+			die("could not parse commit %s", buf.buf);
+
+		bitmap_writer_push_commit(c, NULL);
+	}
+
+	bitmap_writer_build(&packed);
+
+	bitmap_writer_set_checksum(p->hash);
+
+	QSORT(index, p->num_objects, idx_oid_cmp);
+
+	strbuf_reset(&buf);
+	strbuf_addstr(&buf, p->pack_name);
+	strbuf_strip_suffix(&buf, ".pack");
+	strbuf_addstr(&buf, ".bitmap");
+	bitmap_writer_finish(index, p->num_objects, buf.buf, 0);
+
+	strbuf_release(&buf);
+	free(index);
+
+	return 0;
+}
+
+int cmd__bitmap(int argc, const char **argv)
+{
+	setup_git_directory();
+
+	if (argc == 3 && !strcmp(argv[1], "write"))
+		return bitmap_write(argv[2]);
+
+	usage("\ttest-tool bitmap write <pack-basename> < <commit-list>");
+
+	return -1;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 9d6d14d9293..c43d8c0977b 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -15,6 +15,7 @@ struct test_cmd {

 static struct test_cmd cmds[] = {
 	{ "advise", cmd__advise_if_enabled },
+	{ "bitmap", cmd__bitmap },
 	{ "bloom", cmd__bloom },
 	{ "chmtime", cmd__chmtime },
 	{ "config", cmd__config },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index a6470ff62c4..27e6e40ffcb 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -5,6 +5,7 @@
 #include "git-compat-util.h"

 int cmd__advise_if_enabled(int argc, const char **argv);
+int cmd__bitmap(int argc, const char **argv);
 int cmd__bloom(int argc, const char **argv);
 int cmd__chmtime(int argc, const char **argv);
 int cmd__config(int argc, const char **argv);
--- >8 ---

> > On our same testing repository from previous commits, this change shrunk
> > our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
> > ~72.24% reduction in the on-disk size of our *.bitmap file. The time to
> > generate the smaller bitmap file decreased by ~3.69 seconds, though this
> > is likely mostly noise.
>
> Certainly good numbers. The obvious follow-up question is: how does the
> reading side fare? I'd expect it to be a little better, if only because
> there are fewer bytes to consider when XOR-ing. But if there's some
> hidden assumption we're missing, then it could get wildly worse. It
> would be good to confirm that that didn't happen. ;)

It doesn't make a huge difference. Prior to this patch, the timings on
my test repository for 'git rev-list --count --all --objects
--use-bitmap-index' go from:

 -  2.180 ± 0.019 seconds (with pseudo-merges)
 - 52.149 ± 0.224 seconds (without pseudo-merges)

, and after applying this patch, it changes to:

 -  2.611 ± 0.023 seconds (with pseudo-merges)
 - 51.963 ± 0.131 seconds (without pseudo-merges)

It looks like there is a minor slow-down on pseudo-merges, and a minor
speed-up without them. The difference is small enough that I'm willing
to treat it as run-to-run noise.

> >  static void compute_xor_offsets(struct bitmap_writer *writer)
> >  {
> >  	static const int MAX_XOR_OFFSET_SEARCH = 10;
> >
> >  	int i, next = 0;
> > +	int nr = bitmap_writer_nr_selected_commits(writer);
> > +
> > +	if (nr > 1) {
> > +		QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
> > +
> > +		for (i = 0; i < nr; i++) {
> > +			struct bitmapped_commit *stored = &writer->selected[i];
> > +			khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
> > +							   stored->commit->object.oid);
> > +
> > +			if (hash_pos == kh_end(writer->bitmaps))
> > +				BUG("selected commit missing from bitmap map: %s",
> > +				    oid_to_hex(&stored->commit->object.oid));
> > +
> > +			kh_value(writer->bitmaps, hash_pos) = stored;
> > +		}
> > +	}
>
> OK. It took me a minute to wrap my head around this. The real work is
> done by QSORT(). But because we maintain a hash pointing into that
> array, we have to go through each hash entry and fix up its pointer.

Yup.

> Looks correct.

Thanks,
Taylor

[^1]: ...and I have great empathy for Stolee's suffering here when
  benchmarking his performance improvements to the bitmap generation
  code from back in the day! ;-).

^ permalink raw reply related

* [PATCH] pkt-line: initialize packet_buffer to avoid macOS linker warning
From: Harald Nordgren via GitGitGadget @ 2026-05-27 17:11 UTC (permalink / raw)
  To: git; +Cc: Harald Nordgren, Harald Nordgren

From: Harald Nordgren <haraldnordgren@gmail.com>

Signed-off-by: Harald Nordgren <haraldnordgren@gmail.com>
---
    pkt-line: initialize packet_buffer to avoid macOS linker warning
    
    Removes this warning:
    
    $ make -s -j8
    GIT_VERSION=2.54.0.380.gc69baaf57b
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment
    

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-2313%2FHaraldNordgren%2Fpkt-line-init-buffer-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-2313/HaraldNordgren/pkt-line-init-buffer-v1
Pull-Request: https://github.com/git/git/pull/2313

 pkt-line.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkt-line.c b/pkt-line.c
index 3fc3e9ea70..cfd2799677 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -8,7 +8,7 @@
 #include "trace.h"
 #include "write-or-die.h"
 
-char packet_buffer[LARGE_PACKET_MAX];
+char packet_buffer[LARGE_PACKET_MAX] = {0};
 static const char *packet_trace_prefix = "git";
 static struct trace_key trace_packet = TRACE_KEY_INIT(PACKET);
 static struct trace_key trace_pack = TRACE_KEY_INIT(PACKFILE);

base-commit: c69baaf57ba26cf117c2b6793802877f19738b0d
-- 
gitgitgadget

^ permalink raw reply related

* [PATCH v2 0/3] daemon: fix network address handling bugs
From: Sebastien Tardif via GitGitGadget @ 2026-05-27 18:18 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Sebastien Tardif
In-Reply-To: <pull.2300.git.git.1778773592.gitgitgadget@gmail.com>

Fix three related issues in daemon.c's network address handling:

IPv6 address corruption in lookup_hostname(): getaddrinfo() is called with
AF_UNSPEC hints, so it may return IPv6 results. However, the code
unconditionally casts ai_addr to sockaddr_in and passes AF_INET to
inet_ntop(). On IPv6-only hosts, this reads from the wrong struct offset,
producing garbage IP addresses. Fixed by checking ai_family and handling
both AF_INET and AF_INET6.

IPv6 address truncation in ip2str(): The sockaddr struct size (ai_addrlen)
is passed as the output buffer size to inet_ntop(). For IPv6,
sizeof(sockaddr_in6) is 28 bytes but INET6_ADDRSTRLEN is 46, so long IPv6
addresses are silently truncated. Fixed by passing sizeof(ip) instead, and
dropping the now-unused len parameter.

NULL pointer in execute() logging: REMOTE_PORT environment variable is used
in a format string without a NULL check (only REMOTE_ADDR was checked). If
REMOTE_PORT is unset, NULL is passed to printf's %s, which is undefined
behavior. Fixed by using a fallback string.

Changes since v1:

 * Split the single patch into three separate commits, one per fix, per
   Patrick's review.
 * Deduplicated the address family handling in lookup_hostname(): instead of
   duplicating the inet_ntop() call for each family, the address pointer is
   extracted into a local void *addr variable first, then inet_ntop() is
   called once, per Patrick's suggestion.
 * The (void *) intermediate cast on ai_addr is used intentionally: C
   guarantees any object pointer round-trips safely through void *, and it
   keeps the per-family blocks shorter than spelling out the full struct
   casts.
 * For the REMOTE_PORT NULL guard: both REMOTE_ADDR and REMOTE_PORT are set
   by the same code path in handle(), so neither should be NULL
   independently. The guard makes the code consistent with the existing
   REMOTE_ADDR check and avoids undefined behavior from printf %s with a
   NULL argument.
 * Die on unexpected address families in lookup_hostname() rather than
   silently leaving addrbuf uninitialized.

Sebastien Tardif (3):
  daemon: fix IPv6 address corruption in lookup_hostname()
  daemon: fix IPv6 address truncation in ip2str()
  daemon: guard NULL REMOTE_PORT in execute() logging

 daemon.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)


base-commit: 59ff4886a579f4bc91e976fe18590b9ae02c7a08
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-2300%2FSebTardif%2Ffix%2Fdaemon-ipv6-and-null-port-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-2300/SebTardif/fix/daemon-ipv6-and-null-port-v2
Pull-Request: https://github.com/git/git/pull/2300

Range-diff vs v1:

 1:  b2d8143811 = 1:  b2d8143811 daemon: fix IPv6 address corruption in lookup_hostname()
 2:  5c01ec3cad = 2:  5c01ec3cad daemon: fix IPv6 address truncation in ip2str()
 3:  1b2f9d1a07 ! 3:  e312735716 daemon: guard NULL REMOTE_PORT in execute() logging
     @@ Metadata
       ## Commit message ##
          daemon: guard NULL REMOTE_PORT in execute() logging
      
     -    The REMOTE_PORT environment variable is used in a format string
     -    without a NULL check, while REMOTE_ADDR is checked. If REMOTE_PORT
     -    is unset, NULL is passed to printf's %s, which is undefined behavior.
     +    REMOTE_ADDR and REMOTE_PORT are both set by the same code path in
     +    handle(), so neither should be NULL independently. However, the
     +    existing code checks REMOTE_ADDR before the loginfo() call but not
     +    REMOTE_PORT. If REMOTE_PORT were unset, NULL would be passed to
     +    printf's %s, which is undefined behavior.
      
     -    Add a fallback string for the NULL case.
     +    Add a fallback string for the NULL case, matching the existing
     +    REMOTE_ADDR guard for consistency.
      
          Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
      

-- 
gitgitgadget

^ permalink raw reply

* [PATCH v2 1/3] daemon: fix IPv6 address corruption in lookup_hostname()
From: Sebastien Tardif via GitGitGadget @ 2026-05-27 18:18 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Sebastien Tardif, Sebastien Tardif
In-Reply-To: <pull.2300.v2.git.git.1779905911.gitgitgadget@gmail.com>

From: Sebastien Tardif <sebtardif@ncf.ca>

getaddrinfo() is called with AF_UNSPEC hints, so it may return IPv6
results. However, the code unconditionally casts ai_addr to
sockaddr_in and passes AF_INET to inet_ntop(). On IPv6-only hosts,
this reads from the wrong struct offset, producing garbage IP
addresses.

Fix this by checking ai_family and extracting the address pointer
into a local variable before calling inet_ntop() once with the
correct family. Die on unexpected address families.

Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
---
 daemon.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/daemon.c b/daemon.c
index 0a7b1aae44..80fa0226d8 100644
--- a/daemon.c
+++ b/daemon.c
@@ -674,9 +674,20 @@ static void lookup_hostname(struct hostinfo *hi)
 
 		gai = getaddrinfo(hi->hostname.buf, NULL, &hints, &ai);
 		if (!gai) {
-			struct sockaddr_in *sin_addr = (void *)ai->ai_addr;
+			void *addr;
+
+			if (ai->ai_family == AF_INET) {
+				struct sockaddr_in *sa = (void *)ai->ai_addr;
+				addr = &sa->sin_addr;
+			} else if (ai->ai_family == AF_INET6) {
+				struct sockaddr_in6 *sa6 = (void *)ai->ai_addr;
+				addr = &sa6->sin6_addr;
+			} else {
+				die("unexpected address family: %d",
+				    ai->ai_family);
+			}
 
-			inet_ntop(AF_INET, &sin_addr->sin_addr,
+			inet_ntop(ai->ai_family, addr,
 				  addrbuf, sizeof(addrbuf));
 			strbuf_addstr(&hi->ip_address, addrbuf);
 
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH v2 2/3] daemon: fix IPv6 address truncation in ip2str()
From: Sebastien Tardif via GitGitGadget @ 2026-05-27 18:18 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Sebastien Tardif, Sebastien Tardif
In-Reply-To: <pull.2300.v2.git.git.1779905911.gitgitgadget@gmail.com>

From: Sebastien Tardif <sebtardif@ncf.ca>

The sockaddr struct size (ai_addrlen) is passed as the output buffer
size to inet_ntop(). For IPv6, sizeof(sockaddr_in6) is 28 bytes but
INET6_ADDRSTRLEN is 46, so long IPv6 addresses are silently truncated.

Fix this by passing sizeof(ip) instead, which is the actual size of
the destination buffer. Drop the now-unused len parameter from
ip2str() and update all callers.

Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
---
 daemon.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/daemon.c b/daemon.c
index 80fa0226d8..103c08d868 100644
--- a/daemon.c
+++ b/daemon.c
@@ -947,7 +947,7 @@ struct socketlist {
 	size_t alloc;
 };
 
-static const char *ip2str(int family, struct sockaddr *sin, socklen_t len)
+static const char *ip2str(int family, struct sockaddr *sin)
 {
 #ifdef NO_IPV6
 	static char ip[INET_ADDRSTRLEN];
@@ -958,11 +958,11 @@ static const char *ip2str(int family, struct sockaddr *sin, socklen_t len)
 	switch (family) {
 #ifndef NO_IPV6
 	case AF_INET6:
-		inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len);
+		inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, sizeof(ip));
 		break;
 #endif
 	case AF_INET:
-		inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len);
+		inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, sizeof(ip));
 		break;
 	default:
 		xsnprintf(ip, sizeof(ip), "<unknown>");
@@ -1019,14 +1019,14 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis
 
 		if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
 			logerror("Could not bind to %s: %s",
-				 ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
+				 ip2str(ai->ai_family, ai->ai_addr),
 				 strerror(errno));
 			close(sockfd);
 			continue;	/* not fatal */
 		}
 		if (listen(sockfd, 5) < 0) {
 			logerror("Could not listen to %s: %s",
-				 ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen),
+				 ip2str(ai->ai_family, ai->ai_addr),
 				 strerror(errno));
 			close(sockfd);
 			continue;	/* not fatal */
@@ -1080,7 +1080,7 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis
 
 	if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) {
 		logerror("Could not bind to %s: %s",
-			 ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
+			 ip2str(AF_INET, (struct sockaddr *)&sin),
 			 strerror(errno));
 		close(sockfd);
 		return 0;
@@ -1088,7 +1088,7 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis
 
 	if (listen(sockfd, 5) < 0) {
 		logerror("Could not listen to %s: %s",
-			 ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)),
+			 ip2str(AF_INET, (struct sockaddr *)&sin),
 			 strerror(errno));
 		close(sockfd);
 		return 0;
-- 
gitgitgadget


^ permalink raw reply related

* [PATCH v2 3/3] daemon: guard NULL REMOTE_PORT in execute() logging
From: Sebastien Tardif via GitGitGadget @ 2026-05-27 18:18 UTC (permalink / raw)
  To: git; +Cc: Patrick Steinhardt, Sebastien Tardif, Sebastien Tardif
In-Reply-To: <pull.2300.v2.git.git.1779905911.gitgitgadget@gmail.com>

From: Sebastien Tardif <sebtardif@ncf.ca>

REMOTE_ADDR and REMOTE_PORT are both set by the same code path in
handle(), so neither should be NULL independently. However, the
existing code checks REMOTE_ADDR before the loginfo() call but not
REMOTE_PORT. If REMOTE_PORT were unset, NULL would be passed to
printf's %s, which is undefined behavior.

Add a fallback string for the NULL case, matching the existing
REMOTE_ADDR guard for consistency.

Signed-off-by: Sebastien Tardif <sebtardif@ncf.ca>
---
 daemon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/daemon.c b/daemon.c
index 103c08d868..78cca8673f 100644
--- a/daemon.c
+++ b/daemon.c
@@ -753,7 +753,7 @@ static int execute(void)
 	struct strvec env = STRVEC_INIT;
 
 	if (addr)
-		loginfo("Connection from %s:%s", addr, port);
+		loginfo("Connection from %s:%s", addr, port ? port : "?");
 
 	set_keep_alive(0);
 	alarm(init_timeout ? init_timeout : timeout);
-- 
gitgitgadget

^ permalink raw reply related

* Re: [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
From: Taylor Blau @ 2026-05-27 19:24 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
In-Reply-To: <20260527102534.GH981444@coredump.intra.peff.net>

On Wed, May 27, 2026 at 06:25:34AM -0400, Jeff King wrote:
> > It struggles, however, to efficiently generate pseudo-merge bitmaps.
> > Unlike ordinary commits for which the above algorithm is designed,
> > pseudo-merges don't represent any "real" commit in history, just a
> > grouping of non-bitmapped reference tips. In that sense, their first
> > parent is just a part of a larger set, and treating them like ordinary
> > selected commits imposes a significant slow-down when generating bitmaps
> > with pseudo-merges enabled.
>
> This is a great explanation of the problem, and especially this:
>
> > In other words, we pay a nearly ~5 minute penalty to generate
> > pseudo-merge bitmaps, but only save ~50 seconds during traversal.
>
> makes it clear that we're doing something sub-optimal. And it points us
> in the right direction, since that traversal should be able to generate
> the pseudo-merge bitmap we need in the first place! So that should be
> our goal to work towards.
>
> > Instead, build the regular selected commit bitmaps first, considering
> > only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
> > bitmaps have been stored, build each pseudo-merge bitmap separately and
> > attach its parent and object bitmaps to the corresponding pseudo-merge
> > entry before writing the extension.
>
> And then this solution follows naturally from the earlier explanations.
> Good.

Thanks. For as clear as this sounds now, finding this approach took me
longer than I'd like to admit. I'm satisfied, however, with the result.

> In some ways this goes back to the pre-v2.31 way of generating bitmaps,
> which is to just traverse for each bitmap independently. But as you
> note, the whole idea of pseudo-merge bitmaps is that they aren't
> overlapping in any meaningful way. So doing one fill-in traversal per
> pseudo-merge makes sense, and hopefully we hit enough real bitmaps that
> it's not too costly.

Exactly!

> > As a result, the overhead cost for generating pseudo-merges in the above
> > configuration is much smaller:
> >
> >     +------------------+-----------------+---------------+-------------------+
> >     |                  | no pseudo-merge | pseudo-merges | Delta             |
> >     |                  |                 | (HEAD)        |                   |
> >     +------------------+-----------------+---------------+-------------------+
> >     | elapsed          |   294.1 s       |   328.4 s     |  +34.3 s (+11.7%) |
> >     | cycles           | 1,365.5 B       | 1,529.3 B     | +163.7 B (+12.0%) |
> >     | instructions     | 1,389.8 B       | 1,552.8 B     | +163.0 B (+11.7%) |
> >     | CPI              |     0.983       |     0.985     |  +0.002   (+0.2%) |
> >     +------------------+-----------------+---------------+-------------------+
>
> Nice. The time savings are going to depend on how many pseudo-merges we
> generate, I think. And I'd guess that the numbers above come from making
> one big pseudo-merge bitmap, per the config you showed earlier. But you
> probably only want a handful of them in any repo, so hopefully it
> doesn't scale _too_ badly.

That's right, though see below for more thoughts on scaling...

> > Recall that at the start of this series, generating reachability bitmaps
> > took 612.5 seconds *without* pseudo-merges. With this commit, it is
> > still ~46.38% *faster* to generate reachability bitmaps *with*
> > pseudo-merges than it was to generate bitmaps wihtout them at the
> > beginning of this series.
>
> Sure, though 612.5 seconds is all in the distant past. We only care
> about 294.1 seconds now. ;)

Heh ;-). Naturally, I agree here, but wanted to include it for context.
I wanted to point out that the accumulated changes in this series make
it cheaper to generate bitmaps with pseudo-merges now than it was to
generate bitmaps without them before.

> More seriously, I do think the interesting question here is how the time
> scales for various pseudo-merge configurations. I don't know if we have
> any real operational experience with them yet. The original idea is that
> you might slice up the ref space into a few chunks. I'd guess that the
> old code performed badly-ish overall, but the time did not grow all that
> much as you increased the number of chunks. But with the new code, I
> suspect that the cost grows more linearly with number of chunks. That's
> just a guess, though.

I'm not aware of any large-scale deployments of pseudo-merge bitmaps.
This series is written (in part) of the hopes of making one ;-). I think
your intuition on the old code matches my own.

Below are some numbers that give you a sense of how the runtime scales
with the number of pseudo-merges. I'm relying exclusively on "stable"
pseudo-merges here since they have more predictable bucketing behavior,
though note that there isn't an exact way to dial in the number of these
so-called "stable" pseudo-merge groups. We can only control their *size*
(in terms of number of parents), so I ran the harness which produced the
above code with powers of 10 between [10^3, 10^6].

Results are as follows:

    +------------+-------+----------+
    | stableSize | count | time (s) |
    +------------+-------+----------+
    |    1000000 |     1 |   34.963 |
    |     100000 |     3 |   36.954 |
    |      10000 |    26 |  221.963 |
    |       1000 |   252 | 2779.373 |
    +------------+-------+----------+

Which scales roughly like O(x^1.165) (the best fit function I could find
was t(n) = 25.18 + 4.386 * n^1.165, where 'n' is the number of
pseudo-merges, and t(n) is the time it took to generate them).

So it does grow faster than linearly, but it's not too bad. The jump
from 26 to 252 pseudo-merges is pretty significant, though, but having
that many pseudo-merges is probably not something that we would want to
do in practice.

> The other thing we hope for with pseudo-merges is that the chunks are
> selected such that most of the chunks don't change (because they are
> composed of old, stable refs). So in subsequent bitmap generations, we
> can either reuse them either verbatim or as a starting point (if there
> were only additions). But all of that is going to be heuristic and
> depend on your config, the changes the repo sees over time, and so on.
>
> So I don't know if we'd really have good numbers on that.

We don't, and it is somewhat of a pain to simulate. I think the proof
will be in the pudding, so to speak.

> > Now that we have decoupled how we generate pseudo-merges from their
> > representation, the following commits will improve the API around
> > specifying pseudo-merge groupings during bitmap generation.
>
> I think we're at patch 8/8 here. I guess you have more to come
> eventually, but for now this part is just misleading. ;)

Yeah, I cleaved this off of a larger series to make the pseudo-merge API
a little easier to reason about and less clunky to use. But I ended up
hoarding some of those patches, and apparently forgot to adjust the
message here. Thanks for spotting.

Thanks,
Taylor

^ permalink raw reply

* [PATCH v2 0/8] pack-bitmap-write: speed up bitmap generation
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779207127.git.me@ttaylorr.com>

Here is a reroll of my series to improve the performance of reachability
bitmap generation, focusing on very large repositories and the penalty
to generate pseudo-merge reachability bitmaps.

The series is largely unchanged since last time. Notable changes in this
round include:

 - minor refactoring in the pair of patches which consolidate the
   `find_object_pos()` success path and introduce the object position
   cache during bitmap fills, and

 - dropping a stale paragraph from the final patch's message, which
   described follow-up commits that are no longer part of this series.

As usual, a range-diff against v1 is included below for convenience.

Thanks in advance for your review!

Taylor Blau (8):
  pack-bitmap: pass object position to `fill_bitmap_tree()`
  pack-bitmap: check subtree bits before recursing
  pack-bitmap: reuse stored selected bitmaps
  pack-bitmap: consolidate `find_object_pos()` success path
  pack-bitmap: cache object positions during fill
  pack-bitmap: sort bitmaps before XORing
  pack-bitmap: remember pseudo-merge parents
  pack-bitmap: build pseudo-merge bitmaps after regular bitmaps

 pack-bitmap-write.c | 431 +++++++++++++++++++++++++++++++++++++-------
 pack-bitmap.h       |   7 +
 2 files changed, 377 insertions(+), 61 deletions(-)

Range-diff against v1:
1:  13191c19b91 = 1:  ad025810ab3 pack-bitmap: pass object position to `fill_bitmap_tree()`
2:  7d6d1cec0dd = 2:  59da63d0330 pack-bitmap: check subtree bits before recursing
3:  6e1f6bef5f6 = 3:  f13d65c0ad9 pack-bitmap: reuse stored selected bitmaps
4:  c9a56066094 ! 4:  856aa3a6ab7 pack-bitmap: consolidate `find_object_pos()` success path
    @@ Commit message
         Signed-off-by: Taylor Blau <me@ttaylorr.com>
     
      ## pack-bitmap-write.c ##
    +@@ pack-bitmap-write.c: static uint32_t find_object_pos(struct bitmap_writer *writer,
    + 				const struct object_id *oid, int *found)
    + {
    + 	struct object_entry *entry;
    ++	uint32_t pos;
    + 
    + 	entry = packlist_find(writer->to_pack, oid);
    + 	if (entry) {
     @@ pack-bitmap-write.c: static uint32_t find_object_pos(struct bitmap_writer *writer,
      		if (writer->midx)
      			base_objects = writer->midx->num_objects +
5:  e43ef6a42d1 ! 5:  70dfa80d543 pack-bitmap: cache object positions during fill
    @@ pack-bitmap-write.c: void bitmap_writer_push_commit(struct bitmap_writer *writer
      				const struct object_id *oid, int *found)
      {
      	struct object_entry *entry;
    -+	uint32_t pos;
    -+
    + 	uint32_t pos;
    + 
     +	bitmap_writer_init_pos_cache(writer);
     +
     +	if (find_cached_object_pos(writer, oid, &pos)) {
    @@ pack-bitmap-write.c: void bitmap_writer_push_commit(struct bitmap_writer *writer
     +			*found = 1;
     +		return pos;
     +	}
    - 
    ++
      	entry = packlist_find(writer->to_pack, oid);
      	if (entry) {
      		uint32_t base_objects = 0;
6:  b0a4f31353a = 6:  b1184792d23 pack-bitmap: sort bitmaps before XORing
7:  0bd88e6a096 = 7:  673b6262911 pack-bitmap: remember pseudo-merge parents
8:  30ce254312c ! 8:  8722242f1bb pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
    @@ Commit message
         portion of history reachable by one or more pseudo-merge(s), but not by
         any non-pseudo-merge commit selected for bitmapping.
     
    -    Now that we have decoupled how we generate pseudo-merges from their
    -    representation, the following commits will improve the API around
    -    specifying pseudo-merge groupings during bitmap generation.
    -
         Signed-off-by: Taylor Blau <me@ttaylorr.com>
     
      ## pack-bitmap-write.c ##

base-commit: c3d7ca7d982efc3a848fd85f34e867cfc0a99479
-- 
2.54.0.rc1.84.g1cf18622df7

^ permalink raw reply

* [PATCH v2 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()`
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

In the following commit, callers of `fill_bitmap_tree()` will be
required to check the bit corresponding to their tree before calling
that function. That change will reduce the overhead of setting up and
tearing down stack frames for trees whose bits are already set.

To prepare for that change, have callers pass in the tree's bit position
in `fill_bitmap_tree()`, which will make the next commit easier to read.

In the meantime, this change has a surprising and measurable benefit
during bitmap generation, particularly on very large repositories.

When processing sub-trees within `fill_bitmap_tree()`, the preimage of
this patch did the following:

    while (tree_entry(&desc, entry)) {
        switch (object_type(entry.mode)) {
        case OBJ_TREE:
            if (fill_bitmap_tree(writer, bitmap,
                                 lookup_tree(writer->repo,
                                             &entry.oid)) < 0) {
                /* ... */
            }
            /* ... */
        }
    }

, first performing the object lookup via `lookup_tree()`, and then
locating its bit position within the recursive call. This patch
effectively reorders those two calls so that we first discover the
sub-tree's bit position, *then* load its tree.

By reordering these two operations, we spend fewer CPU cycles per
instruction, likely due to improved CPU dependency/cache/pipeline
behavior. Comparing the results of: running `perf stat` before and after
this commit, we have:

    +--------------+-------------+-------------+-------------------+
    |              | HEAD^       | HEAD        | Delta             |
    +--------------+-------------+-------------+-------------------+
    | elapsed      |   612.5 s   |   582.4 s   |  -30.1 s  (-4.9%) |
    | cycles       | 2,857.3 B   | 2,713.3 B   | -144.0 B  (-5.0%) |
    | instructions | 2,413.2 B   | 2,415.5 B   |   +2.3 B  (+0.1%) |
    | CPI          |     1.184   |     1.123   |  -0.061   (-5.1%) |
    +--------------+-------------+-------------+-------------------+

In a large repository with ~4.8M commit, and ~37.1M tree objects this
change improves timing from ~612.5 seconds down to ~582.4 seconds, or a
~4.9% improvement. More importantly, the number of CPU cycles spent
dropped off significantly as a result of this commit, lowering our
cycles-per-instruction ratio by about ~5.1%.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 1c8070f99c0..2d5ff8fd406 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -456,10 +456,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)
 
 static int fill_bitmap_tree(struct bitmap_writer *writer,
 			    struct bitmap *bitmap,
-			    struct tree *tree)
+			    struct tree *tree,
+			    uint32_t pos)
 {
 	int found;
-	uint32_t pos;
 	struct tree_desc desc;
 	struct name_entry entry;
 
@@ -467,9 +467,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 	 * If our bit is already set, then there is nothing to do. Both this
 	 * tree and all of its children will be set.
 	 */
-	pos = find_object_pos(writer, &tree->object.oid, &found);
-	if (!found)
-		return -1;
 	if (bitmap_get(bitmap, pos))
 		return 0;
 	bitmap_set(bitmap, pos);
@@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 	while (tree_entry(&desc, &entry)) {
 		switch (object_type(entry.mode)) {
 		case OBJ_TREE:
+			pos = find_object_pos(writer, &entry.oid, &found);
+			if (!found)
+				return -1;
 			if (fill_bitmap_tree(writer, bitmap,
-					     lookup_tree(writer->repo, &entry.oid)) < 0)
+					     lookup_tree(writer->repo,
+							 &entry.oid), pos) < 0)
 				return -1;
 			break;
 		case OBJ_BLOB:
@@ -575,8 +576,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 	}
 
 	while (tree_queue->nr) {
-		if (fill_bitmap_tree(writer, ent->bitmap,
-				     prio_queue_get(tree_queue)) < 0)
+		struct tree *t = prio_queue_get(tree_queue);
+		int found;
+
+		pos = find_object_pos(writer, &t->object.oid, &found);
+		if (!found)
+			return -1;
+
+		if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
 			return -1;
 	}
 	return 0;
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 2/8] pack-bitmap: check subtree bits before recursing
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

In the previous commit, we adjusted the callers of `fill_bitmap_tree()`
to pass in the bit position of the tree they wish to fill.

This commit makes use of that information at the call site to avoid
setting up a stack frame for fill_bitmap_tree() entirely whenever a
tree's bit position is already set.

Since this is such a hot path, the avoided cost of setting up and
tearing down stack frames for each noop'd call to `fill_bitmap_tree()`
is significant:

    +--------------+-------------+-------------+-------------------+
    |              | HEAD^       | HEAD        | Delta             |
    +--------------+-------------+-------------+-------------------+
    | elapsed      |   582.4 s   |   562.8 s   |  -19.6 s  (-3.4%) |
    | cycles       | 2,713.3 B   | 2,621.3 B   |  -92.0 B  (-3.4%) |
    | instructions | 2,415.5 B   | 2,348.9 B   |  -66.6 B  (-2.8%) |
    | CPI          |     1.123   |     1.116   |  -0.007   (-0.7%) |
    +--------------+-------------+-------------+-------------------+

In the same repository as in the previous commit, our timings dropped
from ~582.4 seconds down to ~562.77 seconds.

While the cycles-per-instruction ratio is basically unchanged, we
execute significantly fewer instructions, and correspondingly fewer
cycles.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 2d5ff8fd406..72610397020 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -463,12 +463,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 	struct tree_desc desc;
 	struct name_entry entry;
 
-	/*
-	 * If our bit is already set, then there is nothing to do. Both this
-	 * tree and all of its children will be set.
-	 */
-	if (bitmap_get(bitmap, pos))
-		return 0;
 	bitmap_set(bitmap, pos);
 
 	if (repo_parse_tree(writer->repo, tree) < 0)
@@ -482,6 +476,15 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 			pos = find_object_pos(writer, &entry.oid, &found);
 			if (!found)
 				return -1;
+			if (bitmap_get(bitmap, pos)) {
+				/*
+				 * If our bit is already set, then there
+				 * is nothing to do. Both this tree and
+				 * all of its children will be set.
+				 */
+				break;
+			}
+
 			if (fill_bitmap_tree(writer, bitmap,
 					     lookup_tree(writer->repo,
 							 &entry.oid), pos) < 0)
@@ -582,6 +585,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 		pos = find_object_pos(writer, &t->object.oid, &found);
 		if (!found)
 			return -1;
+		if (bitmap_get(ent->bitmap, pos)) {
+			/*
+			 * If our bit is already set, then there is
+			 * nothing to do. Both this tree and all of its
+			 * children will be set.
+			 */
+			continue;
+		}
 
 		if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
 			return -1;
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 3/8] pack-bitmap: reuse stored selected bitmaps
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

When `fill_bitmap_commit()` reaches an ancestor that was selected for
its own bitmap and processed earlier, its object closure is already
stored in `writer->bitmaps` as an EWAH bitmap. As a result, walking
through that commit's tree and parents again is redundant.

Teach `fill_bitmap_commit()` to notice that case. For non-root commits in
the walk, look for a stored selected bitmap and OR it into the bitmap
being built. If one exists, skip the commit, its tree, and its parents.

Building bitmaps from scratch on the same test repository from the
previous commits yields a significant speed-up:

    +------------------+-------------+-------------+---------------------+
    |                  | HEAD^       | HEAD        | Delta               |
    +------------------+-------------+-------------+---------------------+
    | elapsed          |   562.8 s   |   324.8 s   |   -237.9 s (-42.3%) |
    | cycles           | 2,621.3 B   | 1,508.6 B   | -1,112.7 B (-42.4%) |
    | instructions     | 2,348.9 B   | 1,436.6 B   |   -912.3 B (-38.8%) |
    | CPI              |     1.116   |     1.050   |   -0.066    (-5.9%) |
    +------------------+-------------+-------------+---------------------+

In our testing repository, there are 1,261 commits selected for bitmap
coverage, and 1,382 maximal commits induced as a result of that. Of the
1,382 calls made to `fill_bitmap_commit()` (one per maximal commit), 131
of them can be short-circuited at some point during their traversal as a
consequence of this change.

In large repositories where the cost of filling the bitmap for any
individual commit is large, being able to short-circuit even ~9.5% of
the calls to `fill_bitmap_commit()` results in a significant savings.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 72610397020..651ad467469 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -509,6 +509,9 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 static int reused_bitmaps_nr;
 static int reused_pseudo_merge_bitmaps_nr;
 
+static int fill_bitmap_commit_calls_nr;
+static int fill_bitmap_commit_found_ancestor_nr;
+
 static int fill_bitmap_commit(struct bitmap_writer *writer,
 			      struct bb_commit *ent,
 			      struct commit *commit,
@@ -519,6 +522,9 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 {
 	int found;
 	uint32_t pos;
+
+	fill_bitmap_commit_calls_nr++;
+
 	if (!ent->bitmap)
 		ent->bitmap = bitmap_new();
 
@@ -553,6 +559,28 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 			bitmap_free(remapped);
 		}
 
+		/*
+		 * If we encounter an ancestor for which we have already
+		 * computed a bitmap during this build (i.e. a regular
+		 * selected commit processed earlier in topo order), we can
+		 * short-circuit the walk: its stored bitmap already covers
+		 * the commit itself, its tree, and all of its ancestors.
+		 */
+		if (c != commit) {
+			khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+							   c->object.oid);
+			if (hash_pos != kh_end(writer->bitmaps)) {
+				struct bitmapped_commit *stored =
+					kh_value(writer->bitmaps, hash_pos);
+				if (stored && stored->bitmap) {
+					fill_bitmap_commit_found_ancestor_nr++;
+					bitmap_or_ewah(ent->bitmap,
+						       stored->bitmap);
+					continue;
+				}
+			}
+		}
+
 		/*
 		 * Mark ourselves and queue our tree. The commit
 		 * walk ensures we cover all parents.
@@ -692,6 +720,12 @@ int bitmap_writer_build(struct bitmap_writer *writer)
 	trace2_data_intmax("pack-bitmap-write", writer->repo,
 			   "building_bitmaps_pseudo_merge_reused",
 			   reused_pseudo_merge_bitmaps_nr);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "fill_bitmap_commit_calls_nr",
+			   fill_bitmap_commit_calls_nr);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "fill_bitmap_commit_found_ancestor_nr",
+			   fill_bitmap_commit_found_ancestor_nr);
 
 	stop_progress(&writer->progress);
 
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 4/8] pack-bitmap: consolidate `find_object_pos()` success path
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

Both sides of `find_object_pos()` report success in the same way by
setting the optional `found` out-parameter and return the resolved
bitmap position.

Prepare for adding more bookkeeping around object-position lookups by
storing the result in a local `pos` variable and sharing the success
return path between the packlist and MIDX cases.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 651ad467469..42ed22feacc 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -217,6 +217,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
 				const struct object_id *oid, int *found)
 {
 	struct object_entry *entry;
+	uint32_t pos;
 
 	entry = packlist_find(writer->to_pack, oid);
 	if (entry) {
@@ -224,23 +225,22 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
 		if (writer->midx)
 			base_objects = writer->midx->num_objects +
 				writer->midx->num_objects_in_base;
-
-		if (found)
-			*found = 1;
-		return oe_in_pack_pos(writer->to_pack, entry) + base_objects;
+		pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
 	} else if (writer->midx) {
-		uint32_t at, pos;
+		uint32_t at;
 
 		if (!bsearch_midx(oid, writer->midx, &at))
 			goto missing;
 		if (midx_to_pack_pos(writer->midx, at, &pos) < 0)
 			goto missing;
-
-		if (found)
-			*found = 1;
-		return pos;
+	} else {
+		goto missing;
 	}
 
+	if (found)
+		*found = 1;
+	return pos;
+
 missing:
 	if (found)
 		*found = 0;
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 5/8] pack-bitmap: cache object positions during fill
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

The previous commits removed some redundant work from bitmap generation
by avoiding unnecessary tree recursion and by reusing selected bitmaps
that have already been computed.

Even with those changes in place, there is still an extremely hot path
from `fill_bitmap_commit()` and `fill_bitmap_tree()` to translate object
IDs into their corresponding bit positions in order to generate their
bitmaps.

In a small repository, this overhead is not significant. However, in a
very large repository (e.g., the one that we have been using as a
benchmark over the past several commits with ~57M total objects), the
overhead of locating object bit positions (often repeatedly) adds up
significantly.

Combat this by adding a small, direct-mapped cache to the bitmap writer
which maps object IDs to their corresponding bit positions. Size the
cache according to the number of objects being written, with fixed lower
and upper bounds so small repositories do not pay for a large table and
large repositories can avoid most repeated packlist and MIDX lookups.

On my machine with (a somewhat outdated) GCC 15.2.0, each entry in the
cache is 40 bytes wide:

    $ pahole -C bitmap_pos_cache_entry pack-bitmap-write.o
    struct bitmap_pos_cache_entry {
            struct object_id           oid;                  /*     0    36 */
            uint32_t                   pos;                  /*    36     4 */

            /* size: 40, cachelines: 1, members: 2 */
            /* last cacheline: 40 bytes */
    };

, and we will allocate up to 2^21 entries for a maximum total of 80 MiB
of cache overhead.

In our example repository from above and in earlier commits, this
results in a ~9.4% reduction in runtime relative to the previous commit:

    +------------------+-------------+-------------+---------------------+
    |                  | HEAD^       | HEAD        | Delta               |
    +------------------+-------------+-------------+---------------------+
    | elapsed          |   324.8 s   |   294.1 s   |    -30.7 s  (-9.4%) |
    | cycles           | 1,508.6 B   | 1,365.5 B   |   -143.0 B  (-9.5%) |
    | instructions     | 1,436.6 B   | 1,389.8 B   |    -46.9 B  (-3.3%) |
    | CPI              |     1.050   |     0.983   |   -0.068    (-6.4%) |
    +------------------+-------------+-------------+---------------------+

When generating bitmaps on this repository (to produce the above
timings), the cache grew to its maximum size of 80 MiB, and resulted in
1.024B cache hits and 59.957M cache misses.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 88 ++++++++++++++++++++++++++++++++++++++++++++-
 pack-bitmap.h       |  7 ++++
 2 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 42ed22feacc..4b6fb07edd7 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -89,6 +89,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
 	ewah_free(writer->tags);
 
 	kh_destroy_oid_map(writer->bitmaps);
+	free(writer->pos_cache);
 
 	kh_foreach_value(writer->pseudo_merge_commits, idx,
 			 free_pseudo_merge_commit_idx(idx));
@@ -213,15 +214,92 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
 	writer->selected_nr++;
 }
 
+struct bitmap_pos_cache_entry {
+	struct object_id oid;
+	uint32_t pos;
+};
+
+#define BITMAP_POS_MIN_CACHE_SIZE (1U << 10)
+#define BITMAP_POS_MAX_CACHE_SIZE (1U << 21)
+#define BITMAP_POS_CACHE_VALID    (1U << 31)
+
+static void bitmap_writer_init_pos_cache(struct bitmap_writer *writer)
+{
+	if (writer->pos_cache)
+		return;
+
+	writer->pos_cache_nr = BITMAP_POS_MIN_CACHE_SIZE;
+
+	while (writer->pos_cache_nr < writer->to_pack->nr_objects &&
+	       writer->pos_cache_nr < BITMAP_POS_MAX_CACHE_SIZE)
+		writer->pos_cache_nr <<= 1;
+
+	CALLOC_ARRAY(writer->pos_cache, writer->pos_cache_nr);
+}
+
+static size_t bitmap_writer_pos_cache_slot(struct bitmap_writer *writer,
+					   const struct object_id *oid)
+{
+	return oidhash(oid) & (writer->pos_cache_nr - 1);
+}
+
+static bool bitmap_writer_pos_cache_valid(struct bitmap_writer *writer,
+					  size_t slot)
+{
+	return !!(writer->pos_cache[slot].pos & BITMAP_POS_CACHE_VALID);
+}
+
+static int find_cached_object_pos(struct bitmap_writer *writer,
+				  const struct object_id *oid, uint32_t *pos)
+{
+	size_t slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+	if (bitmap_writer_pos_cache_valid(writer, slot) &&
+	    oideq(&writer->pos_cache[slot].oid, oid)) {
+		writer->pos_cache_hits++;
+		*pos = writer->pos_cache[slot].pos & ~BITMAP_POS_CACHE_VALID;
+		return 1;
+	}
+
+	writer->pos_cache_misses++;
+	return 0;
+}
+
+static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
+					const struct object_id *oid,
+					uint32_t pos)
+{
+	size_t slot;
+
+	if (pos & BITMAP_POS_CACHE_VALID)
+		return pos; /* too large to cache */
+
+	slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+	oidcpy(&writer->pos_cache[slot].oid, oid);
+	writer->pos_cache[slot].pos = pos | BITMAP_POS_CACHE_VALID;
+
+	return pos;
+}
+
 static uint32_t find_object_pos(struct bitmap_writer *writer,
 				const struct object_id *oid, int *found)
 {
 	struct object_entry *entry;
 	uint32_t pos;
 
+	bitmap_writer_init_pos_cache(writer);
+
+	if (find_cached_object_pos(writer, oid, &pos)) {
+		if (found)
+			*found = 1;
+		return pos;
+	}
+
 	entry = packlist_find(writer->to_pack, oid);
 	if (entry) {
 		uint32_t base_objects = 0;
+
 		if (writer->midx)
 			base_objects = writer->midx->num_objects +
 				writer->midx->num_objects_in_base;
@@ -239,7 +317,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
 
 	if (found)
 		*found = 1;
-	return pos;
+	return store_cached_object_pos(writer, oid, pos);
 
 missing:
 	if (found)
@@ -662,6 +740,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
 		writer->progress = start_progress(writer->repo,
 						  "Building bitmaps",
 						  writer->selected_nr);
+
+	writer->pos_cache_hits = 0;
+	writer->pos_cache_misses = 0;
+
 	trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
 			    writer->repo);
 
@@ -726,6 +808,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
 	trace2_data_intmax("pack-bitmap-write", writer->repo,
 			   "fill_bitmap_commit_found_ancestor_nr",
 			   fill_bitmap_commit_found_ancestor_nr);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "bitmap_pos_cache_hits", writer->pos_cache_hits);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "bitmap_pos_cache_misses", writer->pos_cache_misses);
 
 	stop_progress(&writer->progress);
 
diff --git a/pack-bitmap.h b/pack-bitmap.h
index a95e1c2d115..19a86554579 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -132,6 +132,8 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i
 
 off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
 
+struct bitmap_pos_cache_entry;
+
 struct bitmap_writer {
 	struct repository *repo;
 	struct ewah_bitmap *commits;
@@ -143,6 +145,11 @@ struct bitmap_writer {
 	struct packing_data *to_pack;
 	struct multi_pack_index *midx; /* if appending to a MIDX chain */
 
+	struct bitmap_pos_cache_entry *pos_cache;
+	size_t pos_cache_nr;
+	uint64_t pos_cache_hits;
+	uint64_t pos_cache_misses;
+
 	struct bitmapped_commit *selected;
 	unsigned int selected_nr, selected_alloc;
 
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 6/8] pack-bitmap: sort bitmaps before XORing
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
10 away. However, when callers provide selected commits in an arbitrary
order, the writer may miss good ancestor/descendant pairs and produce
much larger bitmap files without changing query coverage.

Sort the selected bitmaps in date order (from oldest to newest) before
computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
deal with separately in following commits).

On our same testing repository from previous commits, this change shrunk
our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
~72.24% reduction in the on-disk size of our *.bitmap file. The time to
generate the smaller bitmap file decreased by ~3.69 seconds, though this
is likely mostly noise.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 4b6fb07edd7..66282ea14b5 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -327,11 +327,40 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
 	return 0;
 }
 
+static int bitmapped_commit_date_cmp(const void *_a, const void *_b)
+{
+	const struct bitmapped_commit *a = _a;
+	const struct bitmapped_commit *b = _b;
+
+	if (a->commit->date < b->commit->date)
+		return -1;
+	if (a->commit->date > b->commit->date)
+		return 1;
+	return 0;
+}
+
 static void compute_xor_offsets(struct bitmap_writer *writer)
 {
 	static const int MAX_XOR_OFFSET_SEARCH = 10;
 
 	int i, next = 0;
+	int nr = bitmap_writer_nr_selected_commits(writer);
+
+	if (nr > 1) {
+		QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
+
+		for (i = 0; i < nr; i++) {
+			struct bitmapped_commit *stored = &writer->selected[i];
+			khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+							   stored->commit->object.oid);
+
+			if (hash_pos == kh_end(writer->bitmaps))
+				BUG("selected commit missing from bitmap map: %s",
+				    oid_to_hex(&stored->commit->object.oid));
+
+			kh_value(writer->bitmaps, hash_pos) = stored;
+		}
+	}
 
 	while (next < writer->selected_nr) {
 		struct bitmapped_commit *stored = &writer->selected[next];
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 7/8] pack-bitmap: remember pseudo-merge parents
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

write_pseudo_merges() currently builds an array of temporary bitmaps for
the parent set of each pseudo-merge, then serializes those bitmaps later
while writing the extension.

Move those parent bitmaps onto the corresponding bitmapped_commit
entries instead. This keeps the on-disk output unchanged, but gives the
parent bitmap the same lifetime and access pattern that later changes
will use when pseudo-merge object bitmaps are built before the write
step.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 66282ea14b5..8200aed6101 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -32,6 +32,7 @@ struct bitmapped_commit {
 	struct commit *commit;
 	struct ewah_bitmap *bitmap;
 	struct ewah_bitmap *write_as;
+	struct ewah_bitmap *pseudo_merge_parents;
 	int flags;
 	int xor_offset;
 	uint32_t commit_pos;
@@ -102,6 +103,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
 		if (bc->write_as != bc->bitmap)
 			ewah_free(bc->write_as);
 		ewah_free(bc->bitmap);
+		ewah_free(bc->pseudo_merge_parents);
 	}
 	free(writer->selected);
 }
@@ -210,6 +212,7 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
 	writer->selected[writer->selected_nr].write_as = NULL;
 	writer->selected[writer->selected_nr].flags = 0;
 	writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge;
+	writer->selected[writer->selected_nr].pseudo_merge_parents = NULL;
 
 	writer->selected_nr++;
 }
@@ -1004,42 +1007,47 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
 				struct hashfile *f)
 {
 	struct oid_array commits = OID_ARRAY_INIT;
-	struct bitmap **commits_bitmap = NULL;
 	off_t *pseudo_merge_ofs = NULL;
 	off_t start, table_start, next_ext;
 
 	uint32_t base = bitmap_writer_nr_selected_commits(writer);
 	size_t i, j = 0;
 
-	CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr);
 	CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
 
 	for (i = 0; i < writer->pseudo_merges_nr; i++) {
 		struct bitmapped_commit *merge = &writer->selected[base + i];
 		struct commit_list *p;
+		struct bitmap *parents = bitmap_new();
 
 		if (!merge->pseudo_merge)
 			BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
 
-		commits_bitmap[i] = bitmap_new();
-
 		for (p = merge->commit->parents; p; p = p->next)
-			bitmap_set(commits_bitmap[i],
+			bitmap_set(parents,
 				   find_object_pos(writer, &p->item->object.oid,
 						   NULL));
+
+		merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+		bitmap_free(parents);
 	}
 
 	start = hashfile_total(f);
 
 	for (i = 0; i < writer->pseudo_merges_nr; i++) {
-		struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]);
+		struct bitmapped_commit *merge = &writer->selected[base + i];
+
+		if (!merge->pseudo_merge)
+			BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
+
+		if (!merge->pseudo_merge_parents)
+			BUG("missing pseudo-merge parents bitmap for commit %s",
+			    oid_to_hex(&merge->commit->object.oid));
 
 		pseudo_merge_ofs[i] = hashfile_total(f);
 
-		dump_bitmap(f, commits_ewah);
+		dump_bitmap(f, merge->pseudo_merge_parents);
 		dump_bitmap(f, writer->selected[base+i].write_as);
-
-		ewah_free(commits_ewah);
 	}
 
 	next_ext = st_add(hashfile_total(f),
@@ -1122,12 +1130,8 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
 	hashwrite_be64(f, table_start - start);
 	hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t));
 
-	for (i = 0; i < writer->pseudo_merges_nr; i++)
-		bitmap_free(commits_bitmap[i]);
-
 	oid_array_clear(&commits);
 	free(pseudo_merge_ofs);
-	free(commits_bitmap);
 }
 
 static int table_cmp(const void *_va, const void *_vb, void *_data)
-- 
2.54.0.rc1.84.g1cf18622df7


^ permalink raw reply related

* [PATCH v2 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In-Reply-To: <cover.1779911733.git.me@ttaylorr.com>

When generating bitmaps, `bitmap_builder_init()` starts with an initial
selection of commits to receive bitmap coverage, and then determines a
set of "maximal" commits based on its input.

Commit 089f751360f (pack-bitmap-write: build fewer intermediate bitmaps,
2020-12-08) has extensive details, but the gist is as follows:

Each selected commit starts with one commit_mask bit in its "commit
mask" bitmap. Then, we walk the first-parent history in topological
order and OR each commit's mask into its (first) parent. Whenever that
OR results in the parent having more bits set, the child is deemed to be
non-maximal, and the frontier is pushed further back along the first
parent history.

That approach works extremely well for ordinary selected commits, whose
first-parent histories often describe real sharing between the bitmaps
we are going to write.

It struggles, however, to efficiently generate pseudo-merge bitmaps.
Unlike ordinary commits for which the above algorithm is designed,
pseudo-merges don't represent any "real" commit in history, just a
grouping of non-bitmapped reference tips. In that sense, their first
parent is just a part of a larger set, and treating them like ordinary
selected commits imposes a significant slow-down when generating bitmaps
with pseudo-merges enabled.

Consider partitioning all non-bitmapped reference tips into eight
individual pseudo-merges via the following configuration:

    [bitmapPseudoMerge "all"]
        pattern=refs/
        threshold=now
        stableSize=10000000
        maxMerges=8

, the cost of generating a bitmap from scratch rises significantly:

    +------------------+-----------------+---------------+---------------------+
    |                  | no pseudo-merge | pseudo-merges | Delta               |
    |                  |                 | (HEAD^)       |                     |
    +------------------+-----------------+---------------+---------------------+
    | elapsed          |   294.1 s       |   575.0 s     |   +280.9 s (+95.5%) |
    | cycles           | 1,365.5 B       | 2,686.9 B     | +1,321.4 B (+96.8%) |
    | instructions     | 1,389.8 B       | 2,546.6 B     | +1,156.8 B (+83.2%) |
    | CPI              |     0.983       |     1.055     |   +0.073    (+7.4%) |
    +------------------+-----------------+---------------+---------------------+

This is a particularly poor trade-off, because the time saved by these
pseudo-merges during, e.g.,

    $ git rev-list --count --all --objects --use-bitmap-index

is only:

    $ hyperfine -L v true,false -n 'pseudo-merges: {v}' '
        GIT_TEST_USE_PSEUDO_MERGES={v} git.compile rev-list --count \
          --objects --all --use-bitmap-index
      '

    Benchmark 1: pseudo-merges: true
      Time (mean ± σ):      2.613 s ±  0.012 s    [User: 2.308 s, System: 0.305 s]
      Range (min … max):    2.594 s …  2.633 s    10 runs

    Benchmark 2: pseudo-merges: false
      Time (mean ± σ):     52.205 s ±  0.170 s    [User: 51.500 s, System: 0.697 s]
      Range (min … max):   51.956 s … 52.458 s    10 runs

    Summary
      pseudo-merges: true ran
       19.98 ± 0.11 times faster than pseudo-merges: false

In other words, we pay a nearly ~5 minute penalty to generate
pseudo-merge bitmaps, but only save ~50 seconds during traversal.

The problem stems from injecting pseudo-merges into the bitmap builder
as if they were normal commits. The maximal commit selection algorithm
was simply not designed for that case, and performs predictably poorly.

The only reason we reused the maximal commit selection routine for
pseudo-merges alongside regular non-pseudo-merge commits is because we
represent them both as commit objects (where the pseudo-merge commits
just represent a made-up commit as opposed to one that actually exists
in a repository's object store).

Instead, build the regular selected commit bitmaps first, considering
only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
bitmaps have been stored, build each pseudo-merge bitmap separately and
attach its parent and object bitmaps to the corresponding pseudo-merge
entry before writing the extension.

This keeps the regular bitmap build shaped like the no-pseudo-merge
case. The later pseudo-merge fill can still stop at stored selected
ancestor bitmaps, so it does not have to rewalk each pseudo-merge
closure from scratch.

When an existing bitmap has the same pseudo-merge parent set, reuse and
remap that whole pseudo-merge bitmap before falling back to
fill_bitmap_commit(). This preserves the benefit of stable pseudo-merges
while keeping the on-disk format and reader behavior unchanged.

As a result, the overhead cost for generating pseudo-merges in the above
configuration is much smaller:

    +------------------+-----------------+---------------+-------------------+
    |                  | no pseudo-merge | pseudo-merges | Delta             |
    |                  |                 | (HEAD)        |                   |
    +------------------+-----------------+---------------+-------------------+
    | elapsed          |   294.1 s       |   328.4 s     |  +34.3 s (+11.7%) |
    | cycles           | 1,365.5 B       | 1,529.3 B     | +163.7 B (+12.0%) |
    | instructions     | 1,389.8 B       | 1,552.8 B     | +163.0 B (+11.7%) |
    | CPI              |     0.983       |     0.985     |  +0.002   (+0.2%) |
    +------------------+-----------------+---------------+-------------------+

Recall that at the start of this series, generating reachability bitmaps
took 612.5 seconds *without* pseudo-merges. With this commit, it is
still ~46.38% *faster* to generate reachability bitmaps *with*
pseudo-merges than it was to generate bitmaps wihtout them at the
beginning of this series.

The changes to implement this are mostly straightforward. We exclude
pseudo-merge commits from the existing bitmap generation, and walk over
them in a separate pass, by either reusing an existing on-disk
pseudo-merge, or passing the pseudo-merge commit itself back to the
existing routine in `fill_bitmap_commit()`.

(Note that the routine to build pseudo-merge bitmaps is the same both
before and after this change, the difference is only that we do not let
psuedo-merges participate in determining the set of maximal commits.)

The only wrinkle is that `fill_bitmap_commit()` must be taught to not
expect that all tree objects have been parsed, which is the case for any
portion of history reachable by one or more pseudo-merge(s), but not by
any non-pseudo-merge commit selected for bitmapping.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 pack-bitmap-write.c | 210 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 174 insertions(+), 36 deletions(-)

diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 8200aed6101..1bcb3f98a42 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -446,13 +446,17 @@ static void bitmap_builder_init(struct bitmap_builder *bb,
 	revs.topo_order = 1;
 	revs.first_parent_only = 1;
 
-	for (i = 0; i < writer->selected_nr; i++) {
+	for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) {
 		struct bitmapped_commit *bc = &writer->selected[i];
 		struct bb_commit *ent = bb_data_at(&bb->data, bc->commit);
 
+		if (bc->pseudo_merge)
+			BUG("unexpected pseudo-merge at %"PRIuMAX,
+			    (uintmax_t)i);
+
 		ent->selected = 1;
 		ent->maximal = 1;
-		ent->pseudo_merge = bc->pseudo_merge;
+		ent->pseudo_merge = 0;
 		ent->idx = i;
 
 		ent->commit_mask = bitmap_new();
@@ -618,6 +622,8 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
 
 static int reused_bitmaps_nr;
 static int reused_pseudo_merge_bitmaps_nr;
+static int pseudo_merge_bitmap_nr;
+static int pseudo_merge_bitmap_parents;
 
 static int fill_bitmap_commit_calls_nr;
 static int fill_bitmap_commit_found_ancestor_nr;
@@ -631,8 +637,12 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 			      const uint32_t *mapping)
 {
 	int found;
+	int from_pseudo_merge = commit->object.flags & BITMAP_PSEUDO_MERGE;
 	uint32_t pos;
 
+	if (ent->pseudo_merge)
+		BUG("unexpected pseudo-merge commit in fill_bitmap_commit()");
+
 	fill_bitmap_commit_calls_nr++;
 
 	if (!ent->bitmap)
@@ -648,10 +658,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 			struct ewah_bitmap *old;
 			struct bitmap *remapped = bitmap_new();
 
-			if (commit->object.flags & BITMAP_PSEUDO_MERGE)
-				old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
-			else
-				old = bitmap_for_commit(old_bitmap, c);
+			old = bitmap_for_commit(old_bitmap, c);
 			/*
 			 * If this commit has an old bitmap, then translate that
 			 * bitmap and add its bits to this one. No need to walk
@@ -660,10 +667,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 			if (old && !rebuild_bitmap(mapping, old, remapped)) {
 				bitmap_or(ent->bitmap, remapped);
 				bitmap_free(remapped);
-				if (commit->object.flags & BITMAP_PSEUDO_MERGE)
-					reused_pseudo_merge_bitmaps_nr++;
-				else
-					reused_bitmaps_nr++;
+				reused_bitmaps_nr++;
 				continue;
 			}
 			bitmap_free(remapped);
@@ -696,12 +700,32 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 		 * walk ensures we cover all parents.
 		 */
 		if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
+			struct tree *tree;
+
+			if (from_pseudo_merge && !c->object.parsed) {
+				/*
+				 * Commits reachable from selected
+				 * non-pseudo-merges are already parsed
+				 * by the regular bitmap build.
+				 *
+				 * However, pseudo-merge fills can also
+				 * reach commits that were not covered
+				 * there, so parse any such leftovers
+				 * before reading their tree or parents.
+				 */
+				if (repo_parse_commit(writer->repo, c))
+					return -1;
+			}
+
 			pos = find_object_pos(writer, &c->object.oid, &found);
 			if (!found)
 				return -1;
 			bitmap_set(ent->bitmap, pos);
-			prio_queue_put(tree_queue,
-				       repo_get_commit_tree(writer->repo, c));
+
+			tree = repo_get_commit_tree(writer->repo, c);
+			if (!tree)
+				return -1;
+			prio_queue_put(tree_queue, tree);
 		}
 
 		for (p = c->parents; p; p = p->next) {
@@ -738,6 +762,137 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
 	return 0;
 }
 
+static int reuse_pseudo_merge_bitmap(struct bitmap_index *old_bitmap,
+				     const uint32_t *mapping,
+				     struct commit *merge,
+				     struct ewah_bitmap **out)
+{
+	struct ewah_bitmap *old;
+	struct bitmap *remapped;
+
+	if (!old_bitmap || !mapping)
+		return 0;
+
+	old = pseudo_merge_bitmap_for_commit(old_bitmap, merge);
+	if (!old)
+		return 0;
+
+	remapped = bitmap_new();
+	if (rebuild_bitmap(mapping, old, remapped) < 0) {
+		bitmap_free(remapped);
+		return 0;
+	}
+
+	*out = bitmap_to_ewah(remapped);
+	bitmap_free(remapped);
+	reused_pseudo_merge_bitmaps_nr++;
+	return 1;
+}
+
+static int build_pseudo_merge_bitmap(struct bitmap_writer *writer,
+				     struct bitmap_index *old_bitmap,
+				     const uint32_t *mapping,
+				     struct commit *merge,
+				     struct ewah_bitmap **out)
+{
+	struct bb_commit ent = { 0 };
+	struct prio_queue queue = { NULL };
+	struct prio_queue tree_queue = { NULL };
+	unsigned parents = commit_list_count(merge->parents);
+	int ret;
+
+	ent.bitmap = bitmap_new();
+
+	pseudo_merge_bitmap_nr++;
+	pseudo_merge_bitmap_parents += parents;
+
+	if (reuse_pseudo_merge_bitmap(old_bitmap, mapping, merge, out)) {
+		ret = 0;
+		goto done;
+	}
+
+	ret = fill_bitmap_commit(writer, &ent, merge, &queue, &tree_queue,
+				 old_bitmap, mapping);
+
+	if (!ret)
+		*out = bitmap_to_ewah(ent.bitmap);
+
+done:
+	bitmap_free(ent.bitmap);
+	clear_prio_queue(&queue);
+	clear_prio_queue(&tree_queue);
+
+	return ret;
+}
+
+static int build_pseudo_merge_bitmaps(struct bitmap_writer *writer,
+				      struct bitmap_index *old_bitmap,
+				      const uint32_t *mapping,
+				      int *nr_stored)
+{
+	size_t i = bitmap_writer_nr_selected_commits(writer);
+	int ret = 0;
+
+	if (!writer->pseudo_merges_nr)
+		return 0;
+
+	trace2_region_enter("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+			    writer->repo);
+
+	for (; i < writer->selected_nr; i++) {
+		struct bitmapped_commit *merge = &writer->selected[i];
+		struct commit_list *p;
+		struct bitmap *parents = bitmap_new();
+		struct ewah_bitmap *objects = NULL;
+
+		if (!merge->pseudo_merge)
+			BUG("found non-pseudo merge commit at %"PRIuMAX,
+			    (uintmax_t)i);
+
+		for (p = merge->commit->parents; p; p = p->next) {
+			int found;
+			uint32_t pos = find_object_pos(writer,
+						       &p->item->object.oid,
+						       &found);
+			if (!found) {
+				bitmap_free(parents);
+				ret = -1;
+				goto done;
+			}
+			bitmap_set(parents, pos);
+		}
+
+		merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+		bitmap_free(parents);
+
+		if (build_pseudo_merge_bitmap(writer, old_bitmap, mapping,
+					      merge->commit, &objects) < 0) {
+			ret = -1;
+			goto done;
+		}
+		merge->bitmap = objects;
+
+		(*nr_stored)++;
+		display_progress(writer->progress, *nr_stored);
+	}
+
+done:
+	trace2_region_leave("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+			    writer->repo);
+
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "pseudo_merge_bitmap_nr",
+			   pseudo_merge_bitmap_nr);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "building_bitmaps_pseudo_merge_reused",
+			   reused_pseudo_merge_bitmaps_nr);
+	trace2_data_intmax("pack-bitmap-write", writer->repo,
+			   "pseudo_merge_bitmap_parents",
+			   pseudo_merge_bitmap_parents);
+
+	return ret;
+}
+
 static void store_selected(struct bitmap_writer *writer,
 			   struct bb_commit *ent, struct commit *commit)
 {
@@ -821,6 +976,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
 			bitmap_free(ent->bitmap);
 		ent->bitmap = NULL;
 	}
+	if (closed &&
+	    build_pseudo_merge_bitmaps(writer, old_bitmap, mapping,
+				       &nr_stored) < 0)
+		closed = 0;
 	clear_prio_queue(&queue);
 	clear_prio_queue(&tree_queue);
 	bitmap_builder_clear(&bb);
@@ -831,9 +990,6 @@ int bitmap_writer_build(struct bitmap_writer *writer)
 			    writer->repo);
 	trace2_data_intmax("pack-bitmap-write", writer->repo,
 			   "building_bitmaps_reused", reused_bitmaps_nr);
-	trace2_data_intmax("pack-bitmap-write", writer->repo,
-			   "building_bitmaps_pseudo_merge_reused",
-			   reused_pseudo_merge_bitmaps_nr);
 	trace2_data_intmax("pack-bitmap-write", writer->repo,
 			   "fill_bitmap_commit_calls_nr",
 			   fill_bitmap_commit_calls_nr);
@@ -1015,23 +1171,6 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
 
 	CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
 
-	for (i = 0; i < writer->pseudo_merges_nr; i++) {
-		struct bitmapped_commit *merge = &writer->selected[base + i];
-		struct commit_list *p;
-		struct bitmap *parents = bitmap_new();
-
-		if (!merge->pseudo_merge)
-			BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
-
-		for (p = merge->commit->parents; p; p = p->next)
-			bitmap_set(parents,
-				   find_object_pos(writer, &p->item->object.oid,
-						   NULL));
-
-		merge->pseudo_merge_parents = bitmap_to_ewah(parents);
-		bitmap_free(parents);
-	}
-
 	start = hashfile_total(f);
 
 	for (i = 0; i < writer->pseudo_merges_nr; i++) {
@@ -1040,14 +1179,13 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
 		if (!merge->pseudo_merge)
 			BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
 
-		if (!merge->pseudo_merge_parents)
-			BUG("missing pseudo-merge parents bitmap for commit %s",
+		if (!merge->pseudo_merge_parents || !merge->bitmap)
+			BUG("missing pseudo-merge bitmap for commit %s",
 			    oid_to_hex(&merge->commit->object.oid));
 
 		pseudo_merge_ofs[i] = hashfile_total(f);
-
 		dump_bitmap(f, merge->pseudo_merge_parents);
-		dump_bitmap(f, writer->selected[base+i].write_as);
+		dump_bitmap(f, merge->bitmap);
 	}
 
 	next_ext = st_add(hashfile_total(f),
-- 
2.54.0.rc1.84.g1cf18622df7

^ permalink raw reply related

* Re: [PATCH v2 0/3] line-log: integrate -L with the standard log output pipeline
From: D. Ben Knoble @ 2026-05-27 20:20 UTC (permalink / raw)
  To: Michael Montalbo via GitGitGadget; +Cc: git, Michael Montalbo
In-Reply-To: <pull.2094.v2.git.1779738059.gitgitgadget@gmail.com>

On Mon, May 25, 2026 at 3:41 PM Michael Montalbo via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> Since its introduction, git log -L has short-circuited from
> log_tree_commit() into its own output function, bypassing log_tree_diff()
> and log_tree_diff_flush(). This skips no_free save/restore,
> always_show_header, diff_free() cleanup, and means that pickaxe (-S, -G,
> --find-object) and --diff-filter cannot suppress commits whose pairs are all
> filtered out, because show_log() runs before diffcore_std().
>
> This series restructures the flow so that -L goes through the same
> log_tree_diff() -> log_tree_diff_flush() path as normal single-parent and
> merge diffs, then uses that to enable several non-patch diff formats.
>
> Patch 1: revision: move -L setup before output_format-to-diff derivation
>
> Preparatory reorder in setup_revisions(). The -L block sets a default
> DIFF_FORMAT_PATCH when no format is requested; move it before the derivation
> of revs->diff from output_format so the default is visible to that check. No
> behavior change on its own.
>
> Patch 2: line-log: integrate -L output with the standard log-tree pipeline
>
> Rename line_log_print() to line_log_queue_pairs(), stripping it down to only
> queue pre-computed filepairs. log_tree_diff_flush() handles show_log(),
> diffcore_std(), and diff_flush(). This fixes pickaxe and --diff-filter
> suppression, and aligns the commit/diff separator with the rest of log
> output. Rejects --full-diff, which is not yet supported when filepairs are
> pre-computed.
>
> Patch 3: line-log: allow non-patch diff formats with -L
>
> Expand the allowlist to accept --raw, --name-only, --name-status, and
> --summary. These only read filepair metadata already set by the line-log
> machinery. Diff stat formats (--stat, --numstat, --shortstat, --dirstat)
> remain blocked because they call compute_diffstat() on full blob content and
> would show whole-file statistics rather than range-scoped ones.
>
> Changes since v1:
>
>  * Patch 2: use !opt->loginfo return convention in log_tree_diff() to match
>    the existing single-parent and merge codepaths, instead of returning
>    log_tree_diff_flush() directly.
>  * Patch 2: reword the early-return removal to explicitly tie it to the
>    pipeline change.
>  * Patch 2: soften --full-diff rejection to "not yet supported".
>  * Patches 2-3: use test_grep consistently in new tests.
>  * Patch 2: replace sed | grep pipe with sed > file && test_grep for proper
>    exit status handling.
>
> Michael Montalbo (3):
>   revision: move -L setup before output_format-to-diff derivation
>   line-log: integrate -L output with the standard log-tree pipeline
>   line-log: allow non-patch diff formats with -L
>
>  Documentation/line-range-options.adoc         |  10 +-
>  line-log.c                                    |  30 ++----
>  line-log.h                                    |   2 +-
>  log-tree.c                                    |  10 +-
>  revision.c                                    |  24 +++--
>  t/t4211-line-log.sh                           | 100 +++++++++++++++---
>  t/t4211/sha1/expect.parallel-change-f-to-main |   1 -
>  .../sha256/expect.parallel-change-f-to-main   |   1 -
>  8 files changed, 121 insertions(+), 57 deletions(-)
>
>
> base-commit: 9f223ef1c026d91c7ac68cc0211bde255dda6199
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2094%2Fmmontalbo%2Fmm%2Fline-log-use-log-tree-diff-flush-v2
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2094/mmontalbo/mm/line-log-use-log-tree-diff-flush-v2
> Pull-Request: https://github.com/gitgitgadget/git/pull/2094
>
> Range-diff vs v1:

Looks good.

>
>  1:  9633eb62c6 = 1:  9633eb62c6 revision: move -L setup before output_format-to-diff derivation
>  2:  2d9e0ca015 ! 2:  7acfc5376e line-log: integrate -L output with the standard log-tree pipeline
>      @@ Commit message
>              log_tree_diff_flush(), mirroring the diff_tree_oid() + flush
>              pattern used by the single-parent and merge codepaths.
>
>      -     - Remove the early return in log_tree_commit() that bypassed
>      -       no_free save/restore, always_show_header, and diff_free().
>      +     - Remove the early return in log_tree_commit() that is no longer
>      +       needed now that -L output flows through log_tree_diff() and
>      +       log_tree_diff_flush(); this restores no_free save/restore,
>      +       always_show_header, and diff_free() cleanup.
>
>           Because show_log() is now deferred until after diffcore_std() inside
>           log_tree_diff_flush(), pickaxe (-S, -G, --find-object) and
>      @@ Commit message
>           log_tree_diff_flush() only emits one for verbose headers.  This
>           matches the rest of log output.
>
>      -    Also reject --full-diff, which is meaningless with -L: the filepairs
>      -    are pre-computed during the history walk and scoped to tracked paths,
>      -    so there is no tree diff to widen.
>      +    Also reject --full-diff, which is not yet supported with -L: the
>      +    filepairs are pre-computed during the history walk and scoped to
>      +    tracked line ranges, so there is currently no full-tree diff to
>      +    fall back to for display.
>
>           Update tests accordingly.
>
>      @@ log-tree.c: static int log_tree_diff(struct rev_info *opt, struct commit *commit
>
>       + if (opt->line_level_traverse) {
>       +         line_log_queue_pairs(opt, commit);
>      -+         return log_tree_diff_flush(opt);
>      ++         log_tree_diff_flush(opt);
>      ++         return !opt->loginfo;
>       + }
>       +
>         parse_commit_or_die(commit);
>      @@ log-tree.c: int log_tree_commit(struct rev_info *opt, struct commit *commit)
>
>        ## revision.c ##
>       @@ revision.c: int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>      +          die(_("the option '%s' requires '%s'"), "--grep-reflog", "--walk-reflogs");
>      +
>         if (revs->line_level_traverse &&
>      -      (revs->diffopt.output_format & ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT)))
>      -          die(_("-L does not yet support diff formats besides -p and -s"));
>      -+ if (revs->line_level_traverse && revs->full_diff)
>      -+         die(_("-L is not compatible with --full-diff"));
>      +-     (revs->diffopt.output_format & ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT)))
>      +-         die(_("-L does not yet support diff formats besides -p and -s"));
>      ++     (revs->full_diff ||
>      ++      (revs->diffopt.output_format &
>      ++       ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT))))
>      ++         die(_("-L does not yet support the requested diff format"));
>
>         if (revs->expand_tabs_in_log < 0)
>                 revs->expand_tabs_in_log = revs->expand_tabs_in_log_default;
>      @@ t/t4211-line-log.sh: test_expect_success '-L with -G filters to diff-text matche
>       + test_cmp expect actual
>       +'
>       +
>      -+test_expect_success '--full-diff is not supported with -L' '
>      ++test_expect_success '--full-diff is not yet supported with -L' '
>       + test_must_fail git log -L1,24:b.c --full-diff 2>err &&
>      -+ test_grep "not compatible with --full-diff" err
>      ++ test_grep "does not yet support" err
>       +'
>       +
>       +test_expect_success '-L --oneline has no extra blank line before diff' '
>       + git checkout parent-oids &&
>       + git log --oneline -L:func2:file.c -1 >actual &&
>       + # Oneline header on line 1, diff starts immediately on line 2
>      -+ sed -n 2p actual | grep "^diff --git"
>      ++ sed -n 2p actual >line2 &&
>      ++ test_grep "^diff --git" line2
>       +'
>       +
>        test_done
>  3:  06c24b416f ! 3:  10a3d8dde2 line-log: allow non-patch diff formats with -L
>      @@ Documentation/line-range-options.adoc
>
>        ## revision.c ##
>       @@ revision.c: int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
>      -          die(_("the option '%s' requires '%s'"), "--grep-reflog", "--walk-reflogs");
>      -
>         if (revs->line_level_traverse &&
>      --     (revs->diffopt.output_format & ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT)))
>      --         die(_("-L does not yet support diff formats besides -p and -s"));
>      -+     (revs->diffopt.output_format &
>      -+      ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT |
>      -+        DIFF_FORMAT_RAW | DIFF_FORMAT_NAME |
>      -+        DIFF_FORMAT_NAME_STATUS | DIFF_FORMAT_SUMMARY)))
>      -+         die(_("-L does not yet support the requested diff format"));
>      -  if (revs->line_level_traverse && revs->full_diff)
>      -          die(_("-L is not compatible with --full-diff"));
>      +      (revs->full_diff ||
>      +       (revs->diffopt.output_format &
>      +-       ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT))))
>      ++       ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT |
>      ++         DIFF_FORMAT_RAW | DIFF_FORMAT_NAME |
>      ++         DIFF_FORMAT_NAME_STATUS | DIFF_FORMAT_SUMMARY))))
>      +          die(_("-L does not yet support the requested diff format"));
>
>      +  if (revs->expand_tabs_in_log < 0)
>
>        ## t/t4211-line-log.sh ##
>       @@ t/t4211-line-log.sh: test_expect_success '-p shows the default patch output' '
>      @@ t/t4211-line-log.sh: test_expect_success '-p shows the default patch output' '
>       - test_must_fail git log -L1,24:b.c --raw
>       +test_expect_success '--raw shows mode, oid, status and path' '
>       + git log -L1,24:b.c --raw --format= >actual &&
>      -+ grep "^:100644 100644 [0-9a-f]\{7\} [0-9a-f]\{7\} M     b.c$" actual &&
>      -+ ! grep "^diff --git" actual &&
>      -+ ! grep "^@@" actual
>      ++ test_grep "^:100644 100644 [0-9a-f]\{7\} [0-9a-f]\{7\} M        b.c$" actual &&
>      ++ ! test_grep "^diff --git" actual &&
>      ++ ! test_grep "^@@" actual

I wish we had docs for all the little test helpers… in particular, I
think this is supposed to be "test_grep !" ?

>       +'
>       +
>       +test_expect_success '--name-only shows path' '
>       + git log -L1,24:b.c --name-only --format= >actual &&
>      -+ grep "^b.c$" actual &&
>      -+ ! grep "^diff --git" actual &&
>      -+ ! grep "^@@" actual
>      ++ test_grep "^b.c$" actual &&
>      ++ ! test_grep "^diff --git" actual &&
>      ++ ! test_grep "^@@" actual
>       +'
>       +
>       +test_expect_success '--name-status shows status and path' '
>       + git log -L1,24:b.c --name-status --format= >actual &&
>      -+ grep "^M        b.c$" actual &&
>      -+ ! grep "^diff --git" actual &&
>      -+ ! grep "^@@" actual
>      ++ test_grep "^M   b.c$" actual &&
>      ++ ! test_grep "^diff --git" actual &&
>      ++ ! test_grep "^@@" actual
>       +'
>       +
>       +test_expect_success '--stat is not yet supported with -L' '
>      @@ t/t4211-line-log.sh: test_expect_success '-p shows the default patch output' '
>
>        test_expect_success 'setup for checking fancy rename following' '
>       @@ t/t4211-line-log.sh: test_expect_success '-L --oneline has no extra blank line before diff' '
>      -  sed -n 2p actual | grep "^diff --git"
>      +  test_grep "^diff --git" line2
>        '
>
>       +test_expect_success '--summary shows new file on root commit' '
>       + git checkout parent-oids &&
>       + git log -L:func2:file.c --summary --format= >actual &&
>      -+ grep "create mode 100644 file.c" actual
>      ++ test_grep "create mode 100644 file.c" actual
>       +'
>       +
>        test_done
>
> --
> gitgitgadget

Thanks

-- 
D. Ben Knoble

^ permalink raw reply

* Re: [PATCH v2 0/3] daemon: fix network address handling bugs
From: Junio C Hamano @ 2026-05-27 21:00 UTC (permalink / raw)
  To: Sebastien Tardif via GitGitGadget
  Cc: git, Patrick Steinhardt, Sebastien Tardif
In-Reply-To: <pull.2300.v2.git.git.1779905911.gitgitgadget@gmail.com>

"Sebastien Tardif via GitGitGadget" <gitgitgadget@gmail.com> writes:

> Fix three related issues in daemon.c's network address handling:
>
> IPv6 address corruption in lookup_hostname(): getaddrinfo() is called with
> AF_UNSPEC hints, so it may return IPv6 results. However, the code
> unconditionally casts ai_addr to sockaddr_in and passes AF_INET to
> inet_ntop(). On IPv6-only hosts, this reads from the wrong struct offset,
> producing garbage IP addresses. Fixed by checking ai_family and handling
> both AF_INET and AF_INET6.
>
> IPv6 address truncation in ip2str(): The sockaddr struct size (ai_addrlen)
> is passed as the output buffer size to inet_ntop(). For IPv6,
> sizeof(sockaddr_in6) is 28 bytes but INET6_ADDRSTRLEN is 46, so long IPv6
> addresses are silently truncated. Fixed by passing sizeof(ip) instead, and
> dropping the now-unused len parameter.
>
> NULL pointer in execute() logging: REMOTE_PORT environment variable is used
> in a format string without a NULL check (only REMOTE_ADDR was checked). If
> REMOTE_PORT is unset, NULL is passed to printf's %s, which is undefined
> behavior. Fixed by using a fallback string.
>
> Changes since v1:
>
>  * Split the single patch into three separate commits, one per fix, per
>    Patrick's review.

This, and all the other items in this list, are differences between
the version before v1 and v2, isn't it?  It is OK to pretend that
the pre-v1 version v0 didn't officially exist, but it would be
helpful to see the inter-version improvements for *this* version.

Indeed, range-diff tells us that the commit log improvement is the
only change since the previous iteration.

> Range-diff vs v1:
>
>  1:  b2d8143811 = 1:  b2d8143811 daemon: fix IPv6 address corruption in lookup_hostname()
>  2:  5c01ec3cad = 2:  5c01ec3cad daemon: fix IPv6 address truncation in ip2str()
>  3:  1b2f9d1a07 ! 3:  e312735716 daemon: guard NULL REMOTE_PORT in execute() logging
>      @@ Metadata
>        ## Commit message ##
>           daemon: guard NULL REMOTE_PORT in execute() logging
>       
>      -    The REMOTE_PORT environment variable is used in a format string
>      -    without a NULL check, while REMOTE_ADDR is checked. If REMOTE_PORT
>      -    is unset, NULL is passed to printf's %s, which is undefined behavior.
>      +    REMOTE_ADDR and REMOTE_PORT are both set by the same code path in
>      +    handle(), so neither should be NULL independently. However, the
>      +    existing code checks REMOTE_ADDR before the loginfo() call but not
>      +    REMOTE_PORT. If REMOTE_PORT were unset, NULL would be passed to
>      +    printf's %s, which is undefined behavior.

This is easier to read than the previous, but it is unclear what the
change is trying to achieve.  You first say if addr is set port can
never be unset.  So by checking addr before calling loginfo(), the
code effectively is ensuring that addr and port are set.  

 (1) The word "However" in "However the existing code checks" does
     not make much sense to me (I would think "Therefore" is less
     confusing, but if what you first said is correct, then it is
     quite obvious and can be left unsaid).

 (2) It is unclear why "If REMOTE_PORT were unset NULL would be ..."
     needs to be brought up.  Yes, you are not supposed to pass NULL
     to printf that expects "%s" to format it.  But isn't the whole
     point of checking that addr is not NULL because the caller
     knows that loginfo() accesses both, and the caller also knows
     that if addr is not NULL, port will never be NULL?  Or is this
     comment about something other than loginfo() where port is used
     without checking neither addr or port?  Then it would not make
     much sense to bring up "addr is checked before calling
     loginfo()".

IOW, the sentence structure got vastly improved than the previous
round, but it made it clearer that what these sentences say is
unclear ;-).

>      -    Add a fallback string for the NULL case.
>      +    Add a fallback string for the NULL case, matching the existing
>      +    REMOTE_ADDR guard for consistency.

I tried to find if there is any existing case (addr ? addr : "") to
match, but I didn't find any.  Probably that is because it is not
needed (instead the code does "if (addr) ..." to protect itself).

I think the only valid justification you could give to this change
is to say that even though the current code is perfectly fine as-is
(i.e. as you said, addr and port are both exported at the same time
so it will never happen that addr is non NULL and port is NULL),
somebody who is not so careful can break that arrangement in the
future, and it is a prudent thing to double check that port is not
NULL before using will future-proof this part of the code.

Thanks.

^ permalink raw reply

* [PATCH 0/3] pack-objects: support bitmaps and delta-islands with `--path-walk`
From: Taylor Blau @ 2026-05-27 23:18 UTC (permalink / raw)
  To: git; +Cc: Derrick Stolee, Junio C Hamano, Jeff King, Elijah Newren

Note to the maintainer:

 * This series is based on 'ds/path-walk-filters' with Patrick's
   'ps/clang-w-glibc-2.43-and-_Generic' merged in. The former has since
   graduated. These are the three remaining patches from my earlier RFC
   after Stolee's series incorporated the filter-related pieces.

Here is a trimmed-down reroll of my series to make `--path-walk` work
with reachability bitmaps and delta-islands. This series was originally
an RFC that was a companion to Stolee's recent patches to extend
`--filter` support to `--path-walk` [1].

Since the previous round, Stolee's series has graduated and incorporated
the filter-related patches from my earlier RFC [2]. What remains are the
three patches here that implement support for reachability bitmaps and
delta-islands under `--path-walk`.

 * The first patch allows `--path-walk` to use reachability bitmaps when
   they can answer the request, falling back to path-walk enumeration
   when they cannot. It also lets bitmap writing see the same commit
   candidates that the regular traversal would have shown to the bitmap
   selector.

 * The second patch is preparatory, and factors the
   delta-islands-specific tree-depth recording from `show_object()` into
   a helper.

 * The final patch teaches the path-walk callback to perform the same
   delta-islands side effects as the regular traversal: propagating
   island marks for commits, and recording tree depths for trees. This
   gives `resolve_tree_islands()` the same input in either enumeration
   mode, so the existing island checks can be reused unchanged.

Thanks in advance for your review!

[1]: https://lore.kernel.org/git/pull.2101.git.1777731354.gitgitgadget@gmail.com/
[2]: https://lore.kernel.org/git/cover.1777853408.git.me@ttaylorr.com/

Taylor Blau (3):
  pack-objects: support reachability bitmaps with `--path-walk`
  pack-objects: extract `record_tree_depth()` helper
  pack-objects: support `--delta-islands` with `--path-walk`

 Documentation/git-pack-objects.adoc | 12 ++---
 builtin/pack-objects.c              | 68 +++++++++++++++++++++--------
 t/t5310-pack-bitmaps.sh             | 36 +++++++++++++++
 t/t5320-delta-islands.sh            | 29 ++++++++++++
 4 files changed, 122 insertions(+), 23 deletions(-)


base-commit: 45a9ecee26839cc880fdd5e704339dd3cf4ffc26
-- 
2.54.0.22.ga642305e3c9

^ permalink raw reply

* [PATCH 1/3] pack-objects: support reachability bitmaps with `--path-walk`
From: Taylor Blau @ 2026-05-27 23:18 UTC (permalink / raw)
  To: git; +Cc: Derrick Stolee, Junio C Hamano, Jeff King, Elijah Newren
In-Reply-To: <cover.1779923907.git.me@ttaylorr.com>

When 'pack-objects' is invoked with '--path-walk', it prevents us from
using reachability bitmaps.

This behavior dates back to 70664d2865c (pack-objects: add --path-walk
option, 2025-05-16), which included a comment in the relevant portion of
the command-line arguments handling that read as follows:

    /*
     * We must disable the bitmaps because we are removing
     * the --objects / --objects-edge[-aggressive] options.
     */

In fb2c309b7d3 (pack-objects: pass --objects with --path-walk,
2026-05-02), path-walk learned to pass '--objects' again, but still
kept bitmap traversal disabled. That leaves two useful cases
unsupported:

 * A path-walk repack that writes bitmaps does not give the bitmap
   selector any commits, because path-walk reveals commits through
   `add_objects_by_path()` rather than through `show_commit()`, where
   `index_commit_for_bitmap()` is normally called.

 * An invocation like "git pack-objects --use-bitmap-index --path-walk"
   never tries an existing bitmap, even when one is available and could
   answer the request.

Fortunately for us, neither restriction is required.

 * On the writing side: teach the path-walk object callback to call
   `index_commit_for_bitmap()` for commits that it adds to the pack.
   That gives the bitmap selector the commit candidates it would have
   seen from the regular traversal.

 * For bitmap reading, keep passing '--objects' to the internal rev_list
   machinery, but stop clearing `use_bitmap_index`. If an existing
   bitmap can answer the request, use it; otherwise fall back to
   path-walk's own enumeration.

There is one wrinkle when it comes to '--boundary', which we must not
pass into the bitmap walk in the presence of both '--path-walk' and
'--use-bitmap-index'. Path-walk needs boundary commits when it performs
its own traversal, in order to discover bases for thin packs, but the
bitmap traversal expects the usual non-boundary state. Work around this
by setting `revs->boundary` as late as possible within
`get_object_list_path_walk()`, after any bitmap attempt has either
succeeded or declined to answer the request.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 Documentation/git-pack-objects.adoc |  6 +++--
 builtin/pack-objects.c              | 18 +++++++++++++--
 t/t5310-pack-bitmaps.sh             | 36 +++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-pack-objects.adoc b/Documentation/git-pack-objects.adoc
index 8a27aa19fd3..0adce8961a3 100644
--- a/Documentation/git-pack-objects.adoc
+++ b/Documentation/git-pack-objects.adoc
@@ -402,8 +402,10 @@ will be automatically changed to version `1`.
 	of filenames that cause collisions in Git's default name-hash
 	algorithm.
 +
-Incompatible with `--delta-islands`. The `--use-bitmap-index` option is
-ignored in the presence of `--path-walk`. The `--path-walk` option
+Incompatible with `--delta-islands`. When `--use-bitmap-index` is
+specified with `--path-walk`, a successful bitmap traversal is used for
+object enumeration, with path-walk remaining as the fallback traversal
+when the bitmap cannot satisfy the request. The `--path-walk` option
 supports the `--filter=<spec>` forms `blob:none`, `blob:limit=<n>`,
 `tree:0`, `object:type=<type>`, and `sparse:<oid>`. These supported filter
 types can be combined with the `combine:<spec>+<spec>` form.
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index b783dc62bc9..e4dcb563b7d 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4732,6 +4732,15 @@ static int add_objects_by_path(const char *path,
 			continue;
 
 		add_object_entry(oid, type, path, exclude);
+
+		if (type == OBJ_COMMIT && write_bitmap_index) {
+			struct commit *commit;
+
+			commit = lookup_commit(the_repository, oid);
+			if (!commit)
+				die(_("could not find commit %s"), oid_to_hex(oid));
+			index_commit_for_bitmap(commit);
+		}
 	}
 
 	oe_end = to_pack.nr_objects;
@@ -4764,6 +4773,13 @@ static int get_object_list_path_walk(struct rev_info *revs)
 	info.path_fn = add_objects_by_path;
 	info.path_fn_data = &processed;
 
+	/*
+	 * Path-walk needs boundary commits to discover thin-pack bases, but
+	 * bitmap traversal does not understand the boundary state. Set it
+	 * here so any prior bitmap attempt sees the usual non-boundary walk.
+	 */
+	revs->boundary = 1;
+
 	/*
 	 * Allow the --[no-]sparse option to be interesting here, if only
 	 * for testing purposes. Paths with no interesting objects will not
@@ -5195,9 +5211,7 @@ int cmd_pack_objects(int argc,
 		}
 	}
 	if (path_walk) {
-		strvec_push(&rp, "--boundary");
 		strvec_push(&rp, "--objects");
-		use_bitmap_index = 0;
 	} else if (thin) {
 		use_internal_rev_list = 1;
 		strvec_push(&rp, shallow
diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
index f693cb56691..69c5da1580a 100755
--- a/t/t5310-pack-bitmaps.sh
+++ b/t/t5310-pack-bitmaps.sh
@@ -577,6 +577,42 @@ test_bitmap_cases
 
 sane_unset GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL
 
+test_expect_success 'path-walk repack can write and use bitmap indexes' '
+	test_when_finished "rm -rf path-walk-bitmap" &&
+	git init path-walk-bitmap &&
+	(
+		cd path-walk-bitmap &&
+		test_commit first &&
+		test_commit second &&
+		test_commit third &&
+
+		git repack -a -d -b --path-walk &&
+		git rev-list --test-bitmap --use-bitmap-index HEAD &&
+
+		git rev-parse HEAD >in &&
+
+		git rev-list --objects --no-object-names HEAD >expect.raw &&
+		sort expect.raw >expect &&
+
+		for reuse in true false
+		do
+			: >trace.txt &&
+
+			GIT_TRACE2_EVENT="$(pwd)/trace.txt" \
+			git -c pack.allowPackReuse=$reuse pack-objects \
+				--stdout --revs --path-walk --use-bitmap-index \
+				<in >out.pack &&
+			grep "\"category\":\"bitmap\",\"key\":\"bitmap/hits\"" trace.txt &&
+
+			git index-pack out.pack &&
+
+			list_packed_objects out.idx >actual.raw &&
+			sort actual.raw >actual &&
+			test_cmp expect actual || return 1
+		done
+	)
+'
+
 test_expect_success 'incremental repack fails when bitmaps are requested' '
 	test_commit more-1 &&
 	test_must_fail git repack -d 2>err &&
-- 
2.54.0.22.ga642305e3c9


^ permalink raw reply related

* [PATCH 2/3] pack-objects: extract `record_tree_depth()` helper
From: Taylor Blau @ 2026-05-27 23:18 UTC (permalink / raw)
  To: git; +Cc: Derrick Stolee, Junio C Hamano, Jeff King, Elijah Newren
In-Reply-To: <cover.1779923907.git.me@ttaylorr.com>

Prepare for a subsequent change that needs to record tree depths from a
second call site by factoring the delta-islands tree-depth bookkeeping
out of `show_object()` and into a helper, `record_tree_depth()`.

The helper looks up the object in `to_pack`, returns early when the
object was not added there, computes the depth from the slash count in
the supplied name, and preserves the existing max-depth-wins behavior
when a tree is reached by more than one path.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 builtin/pack-objects.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e4dcb563b7d..ec02e2b21d2 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -2722,6 +2722,22 @@ static inline void oe_set_tree_depth(struct packing_data *pack,
 	pack->tree_depth[e - pack->objects] = tree_depth;
 }
 
+static void record_tree_depth(const struct object_id *oid, const char *name)
+{
+	const char *p;
+	unsigned depth;
+	struct object_entry *ent;
+
+	/* the empty string is a root tree, which is depth 0 */
+	depth = *name ? 1 : 0;
+	for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
+		depth++;
+
+	ent = packlist_find(&to_pack, oid);
+	if (ent && depth > oe_tree_depth(&to_pack, ent))
+		oe_set_tree_depth(&to_pack, ent, depth);
+}
+
 /*
  * Return the size of the object without doing any delta
  * reconstruction (so non-deltas are true object sizes, but deltas
@@ -4375,20 +4391,8 @@ static void show_object(struct object *obj, const char *name,
 	add_preferred_base_object(name);
 	add_object_entry(&obj->oid, obj->type, name, 0);
 
-	if (use_delta_islands) {
-		const char *p;
-		unsigned depth;
-		struct object_entry *ent;
-
-		/* the empty string is a root tree, which is depth 0 */
-		depth = *name ? 1 : 0;
-		for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
-			depth++;
-
-		ent = packlist_find(&to_pack, &obj->oid);
-		if (ent && depth > oe_tree_depth(&to_pack, ent))
-			oe_set_tree_depth(&to_pack, ent, depth);
-	}
+	if (use_delta_islands)
+		record_tree_depth(&obj->oid, name);
 }
 
 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
-- 
2.54.0.22.ga642305e3c9


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox