* [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()`
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 8:57 ` Jeff King
2026-05-19 16:12 ` [PATCH 2/8] pack-bitmap: check subtree bits before recursing Taylor Blau
` (8 subsequent siblings)
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In the following commit, callers of `fill_bitmap_tree()` will be
required to check the bit corresponding to their tree before calling
that function. That change will reduce the overhead of setting up and
tearing down stack frames for trees whose bits are already set.
To prepare for that change, have callers pass in the tree's bit position
in `fill_bitmap_tree()`, which will make the next commit easier to read.
In the meantime, this change has a surprising and measurable benefit
during bitmap generation, particularly on very large repositories.
When processing sub-trees within `fill_bitmap_tree()`, the preimage of
this patch did the following:
while (tree_entry(&desc, entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo,
&entry.oid)) < 0) {
/* ... */
}
/* ... */
}
}
, first performing the object lookup via `lookup_tree()`, and then
locating its bit position within the recursive call. This patch
effectively reorders those two calls so that we first discover the
sub-tree's bit position, *then* load its tree.
By reordering these two operations, we spend fewer CPU cycles per
instruction, likely due to improved CPU dependency/cache/pipeline
behavior. Comparing the results of: running `perf stat` before and after
this commit, we have:
+--------------+-------------+-------------+-------------------+
| | HEAD^ | HEAD | Delta |
+--------------+-------------+-------------+-------------------+
| elapsed | 612.5 s | 582.4 s | -30.1 s (-4.9%) |
| cycles | 2,857.3 B | 2,713.3 B | -144.0 B (-5.0%) |
| instructions | 2,413.2 B | 2,415.5 B | +2.3 B (+0.1%) |
| CPI | 1.184 | 1.123 | -0.061 (-5.1%) |
+--------------+-------------+-------------+-------------------+
In a large repository with ~4.8M commit, and ~37.1M tree objects this
change improves timing from ~612.5 seconds down to ~582.4 seconds, or a
~4.9% improvement. More importantly, the number of CPU cycles spent
dropped off significantly as a result of this commit, lowering our
cycles-per-instruction ratio by about ~5.1%.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 1c8070f99c0..2d5ff8fd406 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -456,10 +456,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)
static int fill_bitmap_tree(struct bitmap_writer *writer,
struct bitmap *bitmap,
- struct tree *tree)
+ struct tree *tree,
+ uint32_t pos)
{
int found;
- uint32_t pos;
struct tree_desc desc;
struct name_entry entry;
@@ -467,9 +467,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
* If our bit is already set, then there is nothing to do. Both this
* tree and all of its children will be set.
*/
- pos = find_object_pos(writer, &tree->object.oid, &found);
- if (!found)
- return -1;
if (bitmap_get(bitmap, pos))
return 0;
bitmap_set(bitmap, pos);
@@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
while (tree_entry(&desc, &entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
+ pos = find_object_pos(writer, &entry.oid, &found);
+ if (!found)
+ return -1;
if (fill_bitmap_tree(writer, bitmap,
- lookup_tree(writer->repo, &entry.oid)) < 0)
+ lookup_tree(writer->repo,
+ &entry.oid), pos) < 0)
return -1;
break;
case OBJ_BLOB:
@@ -575,8 +576,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
}
while (tree_queue->nr) {
- if (fill_bitmap_tree(writer, ent->bitmap,
- prio_queue_get(tree_queue)) < 0)
+ struct tree *t = prio_queue_get(tree_queue);
+ int found;
+
+ pos = find_object_pos(writer, &t->object.oid, &found);
+ if (!found)
+ return -1;
+
+ if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
return -1;
}
return 0;
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()`
2026-05-19 16:12 ` [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
@ 2026-05-27 8:57 ` Jeff King
2026-05-27 14:36 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 8:57 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:36PM -0400, Taylor Blau wrote:
> In the following commit, callers of `fill_bitmap_tree()` will be
> required to check the bit corresponding to their tree before calling
> that function. That change will reduce the overhead of setting up and
> tearing down stack frames for trees whose bits are already set.
>
> To prepare for that change, have callers pass in the tree's bit position
> in `fill_bitmap_tree()`, which will make the next commit easier to read.
>
> In the meantime, this change has a surprising and measurable benefit
> during bitmap generation, particularly on very large repositories.
It is indeed surprising. There's a possible candidate for the speedup
here:
> @@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
> while (tree_entry(&desc, &entry)) {
> switch (object_type(entry.mode)) {
> case OBJ_TREE:
> + pos = find_object_pos(writer, &entry.oid, &found);
> + if (!found)
> + return -1;
> if (fill_bitmap_tree(writer, bitmap,
> - lookup_tree(writer->repo, &entry.oid)) < 0)
> + lookup_tree(writer->repo,
> + &entry.oid), pos) < 0)
> return -1;
> break;
Whenever "found" is false, we cut out early and skip the hash lookup in
lookup_tree() entirely. But that should almost never happen! It implies
that a reachable object is not in the pack/midx, and thus the bitmaps is
not closed (and we'll refuse to generate it).
So it really is the case that we do the same operations in a different
order. Weird.
But the patch itself looks correct to me, and I get ~6% speedup on a
from-scratch bitmap generation of linux.git. I guess it could vary
between architectures and compilers (I'm using gcc on x86), but since
the reorg is setting us up for further optimizations in the next patch,
I suppose there's no need to look a gift horse in the mouth.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()`
2026-05-27 8:57 ` Jeff King
@ 2026-05-27 14:36 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 14:36 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 04:57:40AM -0400, Jeff King wrote:
> It is indeed surprising. There's a possible candidate for the speedup
> here:
>
> > @@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
> > while (tree_entry(&desc, &entry)) {
> > switch (object_type(entry.mode)) {
> > case OBJ_TREE:
> > + pos = find_object_pos(writer, &entry.oid, &found);
> > + if (!found)
> > + return -1;
> > if (fill_bitmap_tree(writer, bitmap,
> > - lookup_tree(writer->repo, &entry.oid)) < 0)
> > + lookup_tree(writer->repo,
> > + &entry.oid), pos) < 0)
> > return -1;
> > break;
>
> Whenever "found" is false, we cut out early and skip the hash lookup in
> lookup_tree() entirely. But that should almost never happen! It implies
> that a reachable object is not in the pack/midx, and thus the bitmaps is
> not closed (and we'll refuse to generate it).
That's right, and I had actually written something like the following
while developing this patch:
--- 8< ---
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 2d5ff8fd406..328e1c13df3 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -481,7 +481,7 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
case OBJ_TREE:
pos = find_object_pos(writer, &entry.oid, &found);
if (!found)
- return -1;
+ BUG("huh??");
if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo,
&entry.oid), pos) < 0)
--- >8 ---
, but couldn't trigger it in either the test suite nor in my sample
repository. I left it in there as a sanity measure.
> So it really is the case that we do the same operations in a different
> order. Weird.
Yeah, I puzzled over this for quite a while myself. I really think that
this is reordering produces more favorable cache behavior or codegen
that results in a meaningful speedup.
> But the patch itself looks correct to me, and I get ~6% speedup on a
> from-scratch bitmap generation of linux.git. I guess it could vary
> between architectures and compilers (I'm using gcc on x86), but since
> the reorg is setting us up for further optimizations in the next patch,
> I suppose there's no need to look a gift horse in the mouth.
Good, I'm glad that it was reproducible on your machine. And I agree
;-).
Thanks,
Taylor
^ permalink raw reply related [flat|nested] 34+ messages in thread
* [PATCH 2/8] pack-bitmap: check subtree bits before recursing
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
2026-05-19 16:12 ` [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 9:03 ` Jeff King
2026-05-19 16:12 ` [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps Taylor Blau
` (7 subsequent siblings)
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In the previous commit, we adjusted the callers of `fill_bitmap_tree()`
to pass in the bit position of the tree they wish to fill.
This commit makes use of that information at the call site to avoid
setting up a stack frame for fill_bitmap_tree() entirely whenever a
tree's bit position is already set.
Since this is such a hot path, the avoided cost of setting up and
tearing down stack frames for each noop'd call to `fill_bitmap_tree()`
is significant:
+--------------+-------------+-------------+-------------------+
| | HEAD^ | HEAD | Delta |
+--------------+-------------+-------------+-------------------+
| elapsed | 582.4 s | 562.8 s | -19.6 s (-3.4%) |
| cycles | 2,713.3 B | 2,621.3 B | -92.0 B (-3.4%) |
| instructions | 2,415.5 B | 2,348.9 B | -66.6 B (-2.8%) |
| CPI | 1.123 | 1.116 | -0.007 (-0.7%) |
+--------------+-------------+-------------+-------------------+
In the same repository as in the previous commit, our timings dropped
from ~582.4 seconds down to ~562.77 seconds.
While the cycles-per-instruction ratio is basically unchanged, we
execute significantly fewer instructions, and correspondingly fewer
cycles.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 2d5ff8fd406..72610397020 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -463,12 +463,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
struct tree_desc desc;
struct name_entry entry;
- /*
- * If our bit is already set, then there is nothing to do. Both this
- * tree and all of its children will be set.
- */
- if (bitmap_get(bitmap, pos))
- return 0;
bitmap_set(bitmap, pos);
if (repo_parse_tree(writer->repo, tree) < 0)
@@ -482,6 +476,15 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
pos = find_object_pos(writer, &entry.oid, &found);
if (!found)
return -1;
+ if (bitmap_get(bitmap, pos)) {
+ /*
+ * If our bit is already set, then there
+ * is nothing to do. Both this tree and
+ * all of its children will be set.
+ */
+ break;
+ }
+
if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo,
&entry.oid), pos) < 0)
@@ -582,6 +585,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
pos = find_object_pos(writer, &t->object.oid, &found);
if (!found)
return -1;
+ if (bitmap_get(ent->bitmap, pos)) {
+ /*
+ * If our bit is already set, then there is
+ * nothing to do. Both this tree and all of its
+ * children will be set.
+ */
+ continue;
+ }
if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
return -1;
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 2/8] pack-bitmap: check subtree bits before recursing
2026-05-19 16:12 ` [PATCH 2/8] pack-bitmap: check subtree bits before recursing Taylor Blau
@ 2026-05-27 9:03 ` Jeff King
2026-05-27 14:36 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 9:03 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:39PM -0400, Taylor Blau wrote:
> In the previous commit, we adjusted the callers of `fill_bitmap_tree()`
> to pass in the bit position of the tree they wish to fill.
>
> This commit makes use of that information at the call site to avoid
> setting up a stack frame for fill_bitmap_tree() entirely whenever a
> tree's bit position is already set.
OK, this one at least has a plausible explanation. ;)
I can reproduce your speedup on linux.git (~5% again). I don't love that
we have to duplicate the logic in each of the callers, but there are
only two sites (and unlikely to ever be more). And it is only one line,
the comment notwithstanding. That seems like a good tradeoff for a
multiple-second speedup.
The patch itself looks obviously correct.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [PATCH 2/8] pack-bitmap: check subtree bits before recursing
2026-05-27 9:03 ` Jeff King
@ 2026-05-27 14:36 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 14:36 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 05:03:48AM -0400, Jeff King wrote:
> On Tue, May 19, 2026 at 12:12:39PM -0400, Taylor Blau wrote:
>
> > In the previous commit, we adjusted the callers of `fill_bitmap_tree()`
> > to pass in the bit position of the tree they wish to fill.
> >
> > This commit makes use of that information at the call site to avoid
> > setting up a stack frame for fill_bitmap_tree() entirely whenever a
> > tree's bit position is already set.
>
> OK, this one at least has a plausible explanation. ;)
>
> I can reproduce your speedup on linux.git (~5% again). I don't love that
> we have to duplicate the logic in each of the callers, but there are
> only two sites (and unlikely to ever be more). And it is only one line,
> the comment notwithstanding. That seems like a good tradeoff for a
> multiple-second speedup.
Yup, exactly. There are naturally only two callers: one to handle a
commit's root tree, and another for recursive calls to handle subtrees.
As a result, I'm OK with the duplication here.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 34+ messages in thread
* [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
2026-05-19 16:12 ` [PATCH 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
2026-05-19 16:12 ` [PATCH 2/8] pack-bitmap: check subtree bits before recursing Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 9:24 ` Jeff King
2026-05-19 16:12 ` [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
` (6 subsequent siblings)
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
When `fill_bitmap_commit()` reaches an ancestor that was selected for
its own bitmap and processed earlier, its object closure is already
stored in `writer->bitmaps` as an EWAH bitmap. As a result, walking
through that commit's tree and parents again is redundant.
Teach `fill_bitmap_commit()` to notice that case. For non-root commits in
the walk, look for a stored selected bitmap and OR it into the bitmap
being built. If one exists, skip the commit, its tree, and its parents.
Building bitmaps from scratch on the same test repository from the
previous commits yields a significant speed-up:
+------------------+-------------+-------------+---------------------+
| | HEAD^ | HEAD | Delta |
+------------------+-------------+-------------+---------------------+
| elapsed | 562.8 s | 324.8 s | -237.9 s (-42.3%) |
| cycles | 2,621.3 B | 1,508.6 B | -1,112.7 B (-42.4%) |
| instructions | 2,348.9 B | 1,436.6 B | -912.3 B (-38.8%) |
| CPI | 1.116 | 1.050 | -0.066 (-5.9%) |
+------------------+-------------+-------------+---------------------+
In our testing repository, there are 1,261 commits selected for bitmap
coverage, and 1,382 maximal commits induced as a result of that. Of the
1,382 calls made to `fill_bitmap_commit()` (one per maximal commit), 131
of them can be short-circuited at some point during their traversal as a
consequence of this change.
In large repositories where the cost of filling the bitmap for any
individual commit is large, being able to short-circuit even ~9.5% of
the calls to `fill_bitmap_commit()` results in a significant savings.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 72610397020..651ad467469 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -509,6 +509,9 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
static int reused_bitmaps_nr;
static int reused_pseudo_merge_bitmaps_nr;
+static int fill_bitmap_commit_calls_nr;
+static int fill_bitmap_commit_found_ancestor_nr;
+
static int fill_bitmap_commit(struct bitmap_writer *writer,
struct bb_commit *ent,
struct commit *commit,
@@ -519,6 +522,9 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
{
int found;
uint32_t pos;
+
+ fill_bitmap_commit_calls_nr++;
+
if (!ent->bitmap)
ent->bitmap = bitmap_new();
@@ -553,6 +559,28 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
bitmap_free(remapped);
}
+ /*
+ * If we encounter an ancestor for which we have already
+ * computed a bitmap during this build (i.e. a regular
+ * selected commit processed earlier in topo order), we can
+ * short-circuit the walk: its stored bitmap already covers
+ * the commit itself, its tree, and all of its ancestors.
+ */
+ if (c != commit) {
+ khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+ c->object.oid);
+ if (hash_pos != kh_end(writer->bitmaps)) {
+ struct bitmapped_commit *stored =
+ kh_value(writer->bitmaps, hash_pos);
+ if (stored && stored->bitmap) {
+ fill_bitmap_commit_found_ancestor_nr++;
+ bitmap_or_ewah(ent->bitmap,
+ stored->bitmap);
+ continue;
+ }
+ }
+ }
+
/*
* Mark ourselves and queue our tree. The commit
* walk ensures we cover all parents.
@@ -692,6 +720,12 @@ int bitmap_writer_build(struct bitmap_writer *writer)
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_pseudo_merge_reused",
reused_pseudo_merge_bitmaps_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "fill_bitmap_commit_calls_nr",
+ fill_bitmap_commit_calls_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "fill_bitmap_commit_found_ancestor_nr",
+ fill_bitmap_commit_found_ancestor_nr);
stop_progress(&writer->progress);
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps
2026-05-19 16:12 ` [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps Taylor Blau
@ 2026-05-27 9:24 ` Jeff King
2026-05-27 14:40 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 9:24 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:41PM -0400, Taylor Blau wrote:
> Building bitmaps from scratch on the same test repository from the
> previous commits yields a significant speed-up:
>
> +------------------+-------------+-------------+---------------------+
> | | HEAD^ | HEAD | Delta |
> +------------------+-------------+-------------+---------------------+
> | elapsed | 562.8 s | 324.8 s | -237.9 s (-42.3%) |
> | cycles | 2,621.3 B | 1,508.6 B | -1,112.7 B (-42.4%) |
> | instructions | 2,348.9 B | 1,436.6 B | -912.3 B (-38.8%) |
> | CPI | 1.116 | 1.050 | -0.066 (-5.9%) |
> +------------------+-------------+-------------+---------------------+
Oh my, that's a rather nice speedup. I can reproduce here on linux.git
(~47% improvement).
> When `fill_bitmap_commit()` reaches an ancestor that was selected for
> its own bitmap and processed earlier, its object closure is already
> stored in `writer->bitmaps` as an EWAH bitmap. As a result, walking
> through that commit's tree and parents again is redundant.
>
> Teach `fill_bitmap_commit()` to notice that case. For non-root commits in
> the walk, look for a stored selected bitmap and OR it into the bitmap
> being built. If one exists, skip the commit, its tree, and its parents.
I feel like this _shouldn't_ be necessary, because the idea of the
current writing code is to go from the roots up, following inverted
parent pointers, and passing the bitmap up as we go. So whenever we
visit a commit we should in theory have all of the ancestor's bits set
in that bitmap. But I remember that the simple-and-stupid approach ended
up being too memory hungry, so we pick some focal points in the graph
and then fill them independently.
And I guess that's what you're showing here:
> In our testing repository, there are 1,261 commits selected for bitmap
> coverage, and 1,382 maximal commits induced as a result of that. Of the
> 1,382 calls made to `fill_bitmap_commit()` (one per maximal commit), 131
> of them can be short-circuited at some point during their traversal as a
> consequence of this change.
We'll end up seeing some of the same parts of history for various
maximal commits, and this lets us sometimes reuse the earlier efforts.
Anyway, it is hard to argue with the numbers.
> @@ -553,6 +559,28 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
> bitmap_free(remapped);
> }
>
> + /*
> + * If we encounter an ancestor for which we have already
> + * computed a bitmap during this build (i.e. a regular
> + * selected commit processed earlier in topo order), we can
> + * short-circuit the walk: its stored bitmap already covers
> + * the commit itself, its tree, and all of its ancestors.
> + */
> + if (c != commit) {
> + khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
> + c->object.oid);
> + if (hash_pos != kh_end(writer->bitmaps)) {
> + struct bitmapped_commit *stored =
> + kh_value(writer->bitmaps, hash_pos);
> + if (stored && stored->bitmap) {
> + fill_bitmap_commit_found_ancestor_nr++;
> + bitmap_or_ewah(ent->bitmap,
> + stored->bitmap);
> + continue;
> + }
> + }
> + }
OK, so we incur one hash lookup per commit as we walk, which seems like
a good tradeoff.
I wondered about "c != commit" here. "c" is the commit we're traversing,
and "commit" is the one for which we're trying to build the bitmap. So
we would not expect to ever have an entry in writer->bitmaps for "c"
yet, but the conditional is just short-circuiting the hash lookup.
The rest of the patch looks obviously correct. The trace2 bits aren't
strictly necessary, of course, but some metrics might help with further
tuning.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps
2026-05-27 9:24 ` Jeff King
@ 2026-05-27 14:40 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 14:40 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 05:24:12AM -0400, Jeff King wrote:
> On Tue, May 19, 2026 at 12:12:41PM -0400, Taylor Blau wrote:
>
> > Building bitmaps from scratch on the same test repository from the
> > previous commits yields a significant speed-up:
> >
> > +------------------+-------------+-------------+---------------------+
> > | | HEAD^ | HEAD | Delta |
> > +------------------+-------------+-------------+---------------------+
> > | elapsed | 562.8 s | 324.8 s | -237.9 s (-42.3%) |
> > | cycles | 2,621.3 B | 1,508.6 B | -1,112.7 B (-42.4%) |
> > | instructions | 2,348.9 B | 1,436.6 B | -912.3 B (-38.8%) |
> > | CPI | 1.116 | 1.050 | -0.066 (-5.9%) |
> > +------------------+-------------+-------------+---------------------+
>
> Oh my, that's a rather nice speedup. I can reproduce here on linux.git
> (~47% improvement).
>
> > When `fill_bitmap_commit()` reaches an ancestor that was selected for
> > its own bitmap and processed earlier, its object closure is already
> > stored in `writer->bitmaps` as an EWAH bitmap. As a result, walking
> > through that commit's tree and parents again is redundant.
> >
> > Teach `fill_bitmap_commit()` to notice that case. For non-root commits in
> > the walk, look for a stored selected bitmap and OR it into the bitmap
> > being built. If one exists, skip the commit, its tree, and its parents.
>
> I feel like this _shouldn't_ be necessary, because the idea of the
> current writing code is to go from the roots up, following inverted
> parent pointers, and passing the bitmap up as we go. So whenever we
> visit a commit we should in theory have all of the ancestor's bits set
> in that bitmap. But I remember that the simple-and-stupid approach ended
> up being too memory hungry, so we pick some focal points in the graph
> and then fill them independently.
It's sharing within the non-first parent history that is killing us
here. I think what you said is true in a completely linear repository
with no merges. But since we only pass commit masks from commits to
their first parents, we don't reuse any already-generated bitmaps for
common points in history not shared between commits' first parents.
> I wondered about "c != commit" here. "c" is the commit we're traversing,
> and "commit" is the one for which we're trying to build the bitmap. So
> we would not expect to ever have an entry in writer->bitmaps for "c"
> yet, but the conditional is just short-circuiting the hash lookup.
Exactly.
> The rest of the patch looks obviously correct. The trace2 bits aren't
> strictly necessary, of course, but some metrics might help with further
> tuning.
Yeah, these were for my own curiosity as much as anything. I had written
them as a temporary measure in order to write the "[...] there are 1,261
commits selected for bitmap coverage, and 1,382 maximal commits induced
[...]" portion of the commit message above.
Once I had written it, I found the result useful enough to keep around.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 34+ messages in thread
* [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (2 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 3/8] pack-bitmap: reuse stored selected bitmaps Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-20 14:42 ` SZEDER Gábor
2026-05-27 9:27 ` Jeff King
2026-05-19 16:12 ` [PATCH 5/8] pack-bitmap: cache object positions during fill Taylor Blau
` (5 subsequent siblings)
9 siblings, 2 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
Both sides of `find_object_pos()` report success in the same way by
setting the optional `found` out-parameter and return the resolved
bitmap position.
Prepare for adding more bookkeeping around object-position lookups by
storing the result in a local `pos` variable and sharing the success
return path between the packlist and MIDX cases.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 651ad467469..6483fdc7daf 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -224,23 +224,22 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
if (writer->midx)
base_objects = writer->midx->num_objects +
writer->midx->num_objects_in_base;
-
- if (found)
- *found = 1;
- return oe_in_pack_pos(writer->to_pack, entry) + base_objects;
+ pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
} else if (writer->midx) {
- uint32_t at, pos;
+ uint32_t at;
if (!bsearch_midx(oid, writer->midx, &at))
goto missing;
if (midx_to_pack_pos(writer->midx, at, &pos) < 0)
goto missing;
-
- if (found)
- *found = 1;
- return pos;
+ } else {
+ goto missing;
}
+ if (found)
+ *found = 1;
+ return pos;
+
missing:
if (found)
*found = 0;
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path
2026-05-19 16:12 ` [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
@ 2026-05-20 14:42 ` SZEDER Gábor
2026-05-20 17:12 ` Taylor Blau
2026-05-27 9:27 ` Jeff King
1 sibling, 1 reply; 34+ messages in thread
From: SZEDER Gábor @ 2026-05-20 14:42 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:44PM -0400, Taylor Blau wrote:
> Both sides of `find_object_pos()` report success in the same way by
> setting the optional `found` out-parameter and return the resolved
> bitmap position.
>
> Prepare for adding more bookkeeping around object-position lookups by
> storing the result in a local `pos` variable and sharing the success
This 'pos' variable will only be declared in the next commit,
resulting in an error building this commit:
pack-bitmap-write.c: In function ‘find_object_pos’:
pack-bitmap-write.c:227:17: error: ‘pos’ undeclared (first use in this function)
227 | pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
| ^~~
pack-bitmap-write.c:227:17: note: each undeclared identifier is reported only once for each function it appears in
make: *** [Makefile:2917: pack-bitmap-write.o] Error 1
> return path between the packlist and MIDX cases.
>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
> pack-bitmap-write.c | 17 ++++++++---------
> 1 file changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
> index 651ad467469..6483fdc7daf 100644
> --- a/pack-bitmap-write.c
> +++ b/pack-bitmap-write.c
> @@ -224,23 +224,22 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
> if (writer->midx)
> base_objects = writer->midx->num_objects +
> writer->midx->num_objects_in_base;
> -
> - if (found)
> - *found = 1;
> - return oe_in_pack_pos(writer->to_pack, entry) + base_objects;
> + pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
> } else if (writer->midx) {
> - uint32_t at, pos;
> + uint32_t at;
>
> if (!bsearch_midx(oid, writer->midx, &at))
> goto missing;
> if (midx_to_pack_pos(writer->midx, at, &pos) < 0)
> goto missing;
> -
> - if (found)
> - *found = 1;
> - return pos;
> + } else {
> + goto missing;
> }
>
> + if (found)
> + *found = 1;
> + return pos;
> +
> missing:
> if (found)
> *found = 0;
> --
> 2.54.0.rc1.84.g30ce254312c
>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path
2026-05-20 14:42 ` SZEDER Gábor
@ 2026-05-20 17:12 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-20 17:12 UTC (permalink / raw)
To: SZEDER Gábor
Cc: git, Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
On Wed, May 20, 2026 at 04:42:37PM +0200, SZEDER Gábor wrote:
> On Tue, May 19, 2026 at 12:12:44PM -0400, Taylor Blau wrote:
> > Both sides of `find_object_pos()` report success in the same way by
> > setting the optional `found` out-parameter and return the resolved
> > bitmap position.
> >
> > Prepare for adding more bookkeeping around object-position lookups by
> > storing the result in a local `pos` variable and sharing the success
>
> This 'pos' variable will only be declared in the next commit,
> resulting in an error building this commit:
>
> pack-bitmap-write.c: In function ‘find_object_pos’:
> pack-bitmap-write.c:227:17: error: ‘pos’ undeclared (first use in this function)
> 227 | pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
> | ^~~
> pack-bitmap-write.c:227:17: note: each undeclared identifier is reported only once for each function it appears in
> make: *** [Makefile:2917: pack-bitmap-write.o] Error 1
Thanks for spotting. I had split the patch that immediately follows this
one into two to make the latter easier to read, but have no idea how
this snuck through.
It's fixed by declaring `pos` in this commit:
--- 8< ---
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 6483fdc7daf..42ed22feacc 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -217,6 +217,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, int *found)
{
struct object_entry *entry;
+ uint32_t pos;
entry = packlist_find(writer->to_pack, oid);
if (entry) {
--- >8 ---
, but I'll send a re-roll after the rest of the series has been
reviewed.
Thanks,
Taylor
^ permalink raw reply related [flat|nested] 34+ messages in thread
* Re: [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path
2026-05-19 16:12 ` [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
2026-05-20 14:42 ` SZEDER Gábor
@ 2026-05-27 9:27 ` Jeff King
1 sibling, 0 replies; 34+ messages in thread
From: Jeff King @ 2026-05-27 9:27 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:44PM -0400, Taylor Blau wrote:
> Both sides of `find_object_pos()` report success in the same way by
> setting the optional `found` out-parameter and return the resolved
> bitmap position.
>
> Prepare for adding more bookkeeping around object-position lookups by
> storing the result in a local `pos` variable and sharing the success
> return path between the packlist and MIDX cases.
OK. Modulo the missing "pos" fixup, this seems like an obviously correct
refactor. On its own its hard to judge if it makes things better, so
let's read on.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread
* [PATCH 5/8] pack-bitmap: cache object positions during fill
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (3 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 9:45 ` Jeff King
2026-05-19 16:12 ` [PATCH 6/8] pack-bitmap: sort bitmaps before XORing Taylor Blau
` (4 subsequent siblings)
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
The previous commits removed some redundant work from bitmap generation
by avoiding unnecessary tree recursion and by reusing selected bitmaps
that have already been computed.
Even with those changes in place, there is still an extremely hot path
from `fill_bitmap_commit()` and `fill_bitmap_tree()` to translate object
IDs into their corresponding bit positions in order to generate their
bitmaps.
In a small repository, this overhead is not significant. However, in a
very large repository (e.g., the one that we have been using as a
benchmark over the past several commits with ~57M total objects), the
overhead of locating object bit positions (often repeatedly) adds up
significantly.
Combat this by adding a small, direct-mapped cache to the bitmap writer
which maps object IDs to their corresponding bit positions. Size the
cache according to the number of objects being written, with fixed lower
and upper bounds so small repositories do not pay for a large table and
large repositories can avoid most repeated packlist and MIDX lookups.
On my machine with (a somewhat outdated) GCC 15.2.0, each entry in the
cache is 40 bytes wide:
$ pahole -C bitmap_pos_cache_entry pack-bitmap-write.o
struct bitmap_pos_cache_entry {
struct object_id oid; /* 0 36 */
uint32_t pos; /* 36 4 */
/* size: 40, cachelines: 1, members: 2 */
/* last cacheline: 40 bytes */
};
, and we will allocate up to 2^21 entries for a maximum total of 80 MiB
of cache overhead.
In our example repository from above and in earlier commits, this
results in a ~9.4% reduction in runtime relative to the previous commit:
+------------------+-------------+-------------+---------------------+
| | HEAD^ | HEAD | Delta |
+------------------+-------------+-------------+---------------------+
| elapsed | 324.8 s | 294.1 s | -30.7 s (-9.4%) |
| cycles | 1,508.6 B | 1,365.5 B | -143.0 B (-9.5%) |
| instructions | 1,436.6 B | 1,389.8 B | -46.9 B (-3.3%) |
| CPI | 1.050 | 0.983 | -0.068 (-6.4%) |
+------------------+-------------+-------------+---------------------+
When generating bitmaps on this repository (to produce the above
timings), the cache grew to its maximum size of 80 MiB, and resulted in
1.024B cache hits and 59.957M cache misses.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 89 ++++++++++++++++++++++++++++++++++++++++++++-
pack-bitmap.h | 7 ++++
2 files changed, 95 insertions(+), 1 deletion(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 6483fdc7daf..4b6fb07edd7 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -89,6 +89,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
ewah_free(writer->tags);
kh_destroy_oid_map(writer->bitmaps);
+ free(writer->pos_cache);
kh_foreach_value(writer->pseudo_merge_commits, idx,
free_pseudo_merge_commit_idx(idx));
@@ -213,14 +214,92 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
writer->selected_nr++;
}
+struct bitmap_pos_cache_entry {
+ struct object_id oid;
+ uint32_t pos;
+};
+
+#define BITMAP_POS_MIN_CACHE_SIZE (1U << 10)
+#define BITMAP_POS_MAX_CACHE_SIZE (1U << 21)
+#define BITMAP_POS_CACHE_VALID (1U << 31)
+
+static void bitmap_writer_init_pos_cache(struct bitmap_writer *writer)
+{
+ if (writer->pos_cache)
+ return;
+
+ writer->pos_cache_nr = BITMAP_POS_MIN_CACHE_SIZE;
+
+ while (writer->pos_cache_nr < writer->to_pack->nr_objects &&
+ writer->pos_cache_nr < BITMAP_POS_MAX_CACHE_SIZE)
+ writer->pos_cache_nr <<= 1;
+
+ CALLOC_ARRAY(writer->pos_cache, writer->pos_cache_nr);
+}
+
+static size_t bitmap_writer_pos_cache_slot(struct bitmap_writer *writer,
+ const struct object_id *oid)
+{
+ return oidhash(oid) & (writer->pos_cache_nr - 1);
+}
+
+static bool bitmap_writer_pos_cache_valid(struct bitmap_writer *writer,
+ size_t slot)
+{
+ return !!(writer->pos_cache[slot].pos & BITMAP_POS_CACHE_VALID);
+}
+
+static int find_cached_object_pos(struct bitmap_writer *writer,
+ const struct object_id *oid, uint32_t *pos)
+{
+ size_t slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+ if (bitmap_writer_pos_cache_valid(writer, slot) &&
+ oideq(&writer->pos_cache[slot].oid, oid)) {
+ writer->pos_cache_hits++;
+ *pos = writer->pos_cache[slot].pos & ~BITMAP_POS_CACHE_VALID;
+ return 1;
+ }
+
+ writer->pos_cache_misses++;
+ return 0;
+}
+
+static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
+ const struct object_id *oid,
+ uint32_t pos)
+{
+ size_t slot;
+
+ if (pos & BITMAP_POS_CACHE_VALID)
+ return pos; /* too large to cache */
+
+ slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+ oidcpy(&writer->pos_cache[slot].oid, oid);
+ writer->pos_cache[slot].pos = pos | BITMAP_POS_CACHE_VALID;
+
+ return pos;
+}
+
static uint32_t find_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, int *found)
{
struct object_entry *entry;
+ uint32_t pos;
+
+ bitmap_writer_init_pos_cache(writer);
+
+ if (find_cached_object_pos(writer, oid, &pos)) {
+ if (found)
+ *found = 1;
+ return pos;
+ }
entry = packlist_find(writer->to_pack, oid);
if (entry) {
uint32_t base_objects = 0;
+
if (writer->midx)
base_objects = writer->midx->num_objects +
writer->midx->num_objects_in_base;
@@ -238,7 +317,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
if (found)
*found = 1;
- return pos;
+ return store_cached_object_pos(writer, oid, pos);
missing:
if (found)
@@ -661,6 +740,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
writer->progress = start_progress(writer->repo,
"Building bitmaps",
writer->selected_nr);
+
+ writer->pos_cache_hits = 0;
+ writer->pos_cache_misses = 0;
+
trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
writer->repo);
@@ -725,6 +808,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
trace2_data_intmax("pack-bitmap-write", writer->repo,
"fill_bitmap_commit_found_ancestor_nr",
fill_bitmap_commit_found_ancestor_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "bitmap_pos_cache_hits", writer->pos_cache_hits);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "bitmap_pos_cache_misses", writer->pos_cache_misses);
stop_progress(&writer->progress);
diff --git a/pack-bitmap.h b/pack-bitmap.h
index a95e1c2d115..19a86554579 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -132,6 +132,8 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i
off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
+struct bitmap_pos_cache_entry;
+
struct bitmap_writer {
struct repository *repo;
struct ewah_bitmap *commits;
@@ -143,6 +145,11 @@ struct bitmap_writer {
struct packing_data *to_pack;
struct multi_pack_index *midx; /* if appending to a MIDX chain */
+ struct bitmap_pos_cache_entry *pos_cache;
+ size_t pos_cache_nr;
+ uint64_t pos_cache_hits;
+ uint64_t pos_cache_misses;
+
struct bitmapped_commit *selected;
unsigned int selected_nr, selected_alloc;
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 5/8] pack-bitmap: cache object positions during fill
2026-05-19 16:12 ` [PATCH 5/8] pack-bitmap: cache object positions during fill Taylor Blau
@ 2026-05-27 9:45 ` Jeff King
2026-05-27 14:46 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 9:45 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:47PM -0400, Taylor Blau wrote:
> The previous commits removed some redundant work from bitmap generation
> by avoiding unnecessary tree recursion and by reusing selected bitmaps
> that have already been computed.
>
> Even with those changes in place, there is still an extremely hot path
> from `fill_bitmap_commit()` and `fill_bitmap_tree()` to translate object
> IDs into their corresponding bit positions in order to generate their
> bitmaps.
>
> In a small repository, this overhead is not significant. However, in a
> very large repository (e.g., the one that we have been using as a
> benchmark over the past several commits with ~57M total objects), the
> overhead of locating object bit positions (often repeatedly) adds up
> significantly.
>
> Combat this by adding a small, direct-mapped cache to the bitmap writer
> which maps object IDs to their corresponding bit positions. Size the
> cache according to the number of objects being written, with fixed lower
> and upper bounds so small repositories do not pay for a large table and
> large repositories can avoid most repeated packlist and MIDX lookups.
Introducing another layer of data structure feels so dirty, but it's
hard to argue with the numbers. We are looking up oids in the packlist,
so it's already O(lg n). Your cache here is essentially a hash lookup,
which is O(1)-ish (with collisions causing eviction rather than growth).
And it presumably works because there's a lot of locality in lookups
(between commits X and X^1, their top-level trees will be almost
identical but we have to resolve the bits to find out which entries are
new).
It does make me wonder if we'd see similar improvements if we just
turned the packlist into a regular hash table. Or maybe not, because
then we'd have to do actual probing.
It also makes me wonder if we could use this trick elsewhere, but I
guess we usually are using "struct object" itself to find repeats in
most graph traversals. And there we're using a hash table already. So
this might save us a tiny bit of probing, but not much else.
Likewise when comparing two trees directly, we can just walk them in
parallel to find the changed parts (which doesn't work here, because
we're comparing one tree to the bitmap of all ancestors, not just X^1).
So this really is a somewhat unique situation. It _might_ be applicable
for the reading side of bitmaps, though. When we do fill-in traversal we
end up with this same "read a tree, find the bit for each entry, and 99%
of the time find that it is already in the bitmap".
> On my machine with (a somewhat outdated) GCC 15.2.0, each entry in the
> cache is 40 bytes wide:
>
> $ pahole -C bitmap_pos_cache_entry pack-bitmap-write.o
> struct bitmap_pos_cache_entry {
> struct object_id oid; /* 0 36 */
> uint32_t pos; /* 36 4 */
>
> /* size: 40, cachelines: 1, members: 2 */
> /* last cacheline: 40 bytes */
> };
I wondered about storing a pointer to an oid here, which would be
smaller but require an extra level of pointer chasing. The ones from
object structs are stable, but I guess the ones from trees are not (they
point to an entry field which will be reused). So we have to store the
oid whole.
> In our example repository from above and in earlier commits, this
> results in a ~9.4% reduction in runtime relative to the previous commit:
>
> +------------------+-------------+-------------+---------------------+
> | | HEAD^ | HEAD | Delta |
> +------------------+-------------+-------------+---------------------+
> | elapsed | 324.8 s | 294.1 s | -30.7 s (-9.4%) |
> | cycles | 1,508.6 B | 1,365.5 B | -143.0 B (-9.5%) |
> | instructions | 1,436.6 B | 1,389.8 B | -46.9 B (-3.3%) |
> | CPI | 1.050 | 0.983 | -0.068 (-6.4%) |
> +------------------+-------------+-------------+---------------------+
I show a 26% speed up on linux.git (1m37 down to 1m12). Very cool.
> +static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
> + const struct object_id *oid,
> + uint32_t pos)
> +{
> + size_t slot;
> +
> + if (pos & BITMAP_POS_CACHE_VALID)
> + return pos; /* too large to cache */
Cute, I wondered what would happen if we went past 2^31. I suspect there
are other parts of the code that do not behave that well around that
size, but it is good that we are not introducing any new surprises.
The whole patch looked pretty cleanly done.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 5/8] pack-bitmap: cache object positions during fill
2026-05-27 9:45 ` Jeff King
@ 2026-05-27 14:46 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 14:46 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 05:45:35AM -0400, Jeff King wrote:
> > Combat this by adding a small, direct-mapped cache to the bitmap writer
> > which maps object IDs to their corresponding bit positions. Size the
> > cache according to the number of objects being written, with fixed lower
> > and upper bounds so small repositories do not pay for a large table and
> > large repositories can avoid most repeated packlist and MIDX lookups.
>
> Introducing another layer of data structure feels so dirty, but it's
> hard to argue with the numbers. We are looking up oids in the packlist,
> so it's already O(lg n). Your cache here is essentially a hash lookup,
> which is O(1)-ish (with collisions causing eviction rather than growth).
> And it presumably works because there's a lot of locality in lookups
> (between commits X and X^1, their top-level trees will be almost
> identical but we have to resolve the bits to find out which entries are
> new).
>
> It does make me wonder if we'd see similar improvements if we just
> turned the packlist into a regular hash table. Or maybe not, because
> then we'd have to do actual probing.
I haven't run that experiment directly, but I share your suspicion. I
wrote a 2- and 4-way associative cache implementation as alternatives
before settling on the direct-mapped approach in this patch. I found
that associative caches regardless of cache lines nearly nuked any of
the performance gains that we got as a result of this patch.
> So this really is a somewhat unique situation. It _might_ be applicable
> for the reading side of bitmaps, though. When we do fill-in traversal we
> end up with this same "read a tree, find the bit for each entry, and 99%
> of the time find that it is already in the bitmap".
I think it's certainly likely. In my experience, many object to
bit-position queries are extremely cache-friendly. And in practice, many
large repositories have MIDXs with many tens of millions of objects. So
even on a O(log n) lookup, caching seems to help a lot.
> > In our example repository from above and in earlier commits, this
> > results in a ~9.4% reduction in runtime relative to the previous commit:
> >
> > +------------------+-------------+-------------+---------------------+
> > | | HEAD^ | HEAD | Delta |
> > +------------------+-------------+-------------+---------------------+
> > | elapsed | 324.8 s | 294.1 s | -30.7 s (-9.4%) |
> > | cycles | 1,508.6 B | 1,365.5 B | -143.0 B (-9.5%) |
> > | instructions | 1,436.6 B | 1,389.8 B | -46.9 B (-3.3%) |
> > | CPI | 1.050 | 0.983 | -0.068 (-6.4%) |
> > +------------------+-------------+-------------+---------------------+
>
> I show a 26% speed up on linux.git (1m37 down to 1m12). Very cool.
Glad it reproduces ;-).
> > +static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
> > + const struct object_id *oid,
> > + uint32_t pos)
> > +{
> > + size_t slot;
> > +
> > + if (pos & BITMAP_POS_CACHE_VALID)
> > + return pos; /* too large to cache */
>
> Cute, I wondered what would happen if we went past 2^31. I suspect there
> are other parts of the code that do not behave that well around that
> size, but it is good that we are not introducing any new surprises.
Yeah, I suspect that there are many breakages past the 32-bit unsigned
maximum number of objects. I figure the easiest thing to do in this
patch is to avoid making that situation worse by simply not caching
objects that are near that limit.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 34+ messages in thread
* [PATCH 6/8] pack-bitmap: sort bitmaps before XORing
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (4 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 5/8] pack-bitmap: cache object positions during fill Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 10:04 ` Jeff King
2026-05-19 16:12 ` [PATCH 7/8] pack-bitmap: remember pseudo-merge parents Taylor Blau
` (3 subsequent siblings)
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
10 away. However, when callers provide selected commits in an arbitrary
order, the writer may miss good ancestor/descendant pairs and produce
much larger bitmap files without changing query coverage.
Sort the selected bitmaps in date order (from oldest to newest) before
computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
deal with separately in following commits).
On our same testing repository from previous commits, this change shrunk
our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
~72.24% reduction in the on-disk size of our *.bitmap file. The time to
generate the smaller bitmap file decreased by ~3.69 seconds, though this
is likely mostly noise.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 4b6fb07edd7..66282ea14b5 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -327,11 +327,40 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
return 0;
}
+static int bitmapped_commit_date_cmp(const void *_a, const void *_b)
+{
+ const struct bitmapped_commit *a = _a;
+ const struct bitmapped_commit *b = _b;
+
+ if (a->commit->date < b->commit->date)
+ return -1;
+ if (a->commit->date > b->commit->date)
+ return 1;
+ return 0;
+}
+
static void compute_xor_offsets(struct bitmap_writer *writer)
{
static const int MAX_XOR_OFFSET_SEARCH = 10;
int i, next = 0;
+ int nr = bitmap_writer_nr_selected_commits(writer);
+
+ if (nr > 1) {
+ QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
+
+ for (i = 0; i < nr; i++) {
+ struct bitmapped_commit *stored = &writer->selected[i];
+ khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+ stored->commit->object.oid);
+
+ if (hash_pos == kh_end(writer->bitmaps))
+ BUG("selected commit missing from bitmap map: %s",
+ oid_to_hex(&stored->commit->object.oid));
+
+ kh_value(writer->bitmaps, hash_pos) = stored;
+ }
+ }
while (next < writer->selected_nr) {
struct bitmapped_commit *stored = &writer->selected[next];
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 6/8] pack-bitmap: sort bitmaps before XORing
2026-05-19 16:12 ` [PATCH 6/8] pack-bitmap: sort bitmaps before XORing Taylor Blau
@ 2026-05-27 10:04 ` Jeff King
2026-05-27 16:56 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 10:04 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:50PM -0400, Taylor Blau wrote:
> Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
> 10 away. However, when callers provide selected commits in an arbitrary
> order, the writer may miss good ancestor/descendant pairs and produce
> much larger bitmap files without changing query coverage.
>
> Sort the selected bitmaps in date order (from oldest to newest) before
> computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
> deal with separately in following commits).
That order certainly makes the most sense. I'd have thought we ended up
there incidentally because of the order in which we consider the
commits, but perhaps not. I wonder if this got much worse when we
re-wrote the bitmap generation code a few years ago.
That was in v2.31.0, I think. Repacking linux.git with bitmaps, though,
I couldn't find any difference in size between v2.30 and v2.31. They're
both ~67M. But that also didn't shrink with this patch, either.
If you have some spare CPU cycles to burn, I would be interested in a
comparison of the bitmap size of your test repo using v2.30.0, v2.31.1,
and this patch.
> On our same testing repository from previous commits, this change shrunk
> our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
> ~72.24% reduction in the on-disk size of our *.bitmap file. The time to
> generate the smaller bitmap file decreased by ~3.69 seconds, though this
> is likely mostly noise.
Certainly good numbers. The obvious follow-up question is: how does the
reading side fare? I'd expect it to be a little better, if only because
there are fewer bytes to consider when XOR-ing. But if there's some
hidden assumption we're missing, then it could get wildly worse. It
would be good to confirm that that didn't happen. ;)
> static void compute_xor_offsets(struct bitmap_writer *writer)
> {
> static const int MAX_XOR_OFFSET_SEARCH = 10;
>
> int i, next = 0;
> + int nr = bitmap_writer_nr_selected_commits(writer);
> +
> + if (nr > 1) {
> + QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
> +
> + for (i = 0; i < nr; i++) {
> + struct bitmapped_commit *stored = &writer->selected[i];
> + khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
> + stored->commit->object.oid);
> +
> + if (hash_pos == kh_end(writer->bitmaps))
> + BUG("selected commit missing from bitmap map: %s",
> + oid_to_hex(&stored->commit->object.oid));
> +
> + kh_value(writer->bitmaps, hash_pos) = stored;
> + }
> + }
OK. It took me a minute to wrap my head around this. The real work is
done by QSORT(). But because we maintain a hash pointing into that
array, we have to go through each hash entry and fix up its pointer.
Looks correct.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 6/8] pack-bitmap: sort bitmaps before XORing
2026-05-27 10:04 ` Jeff King
@ 2026-05-27 16:56 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 16:56 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 06:04:06AM -0400, Jeff King wrote:
> On Tue, May 19, 2026 at 12:12:50PM -0400, Taylor Blau wrote:
>
> > Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
> > 10 away. However, when callers provide selected commits in an arbitrary
> > order, the writer may miss good ancestor/descendant pairs and produce
> > much larger bitmap files without changing query coverage.
> >
> > Sort the selected bitmaps in date order (from oldest to newest) before
> > computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
> > deal with separately in following commits).
>
> That order certainly makes the most sense. I'd have thought we ended up
> there incidentally because of the order in which we consider the
> commits, but perhaps not. I wonder if this got much worse when we
> re-wrote the bitmap generation code a few years ago.
>
> That was in v2.31.0, I think. Repacking linux.git with bitmaps, though,
> I couldn't find any difference in size between v2.30 and v2.31. They're
> both ~67M. But that also didn't shrink with this patch, either.
>
> If you have some spare CPU cycles to burn, I would be interested in a
> comparison of the bitmap size of your test repo using v2.30.0, v2.31.1,
> and this patch.
I started running this experiment, but I don't think I actually have
enough CPU cycles to let it finish ;-). Pre-v2.31 bitmap generation is
*really* slow[^1], and after multiple hours (forcing the same selection
of bitmaps by back-porting and adjusting 'test-tool bitmap') I couldn't
seem to make any meaningful progress.
I'm sure that you could get some plausible numbers out of benchmarking
this on a smaller repository. In case you're interested, here's the
patch I wrote on top of v2.30.0:
--- 8< ---
diff --git a/Makefile b/Makefile
index 7b64106930a..9ce9f2b483c 100644
--- a/Makefile
+++ b/Makefile
@@ -690,6 +690,7 @@ X =
PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
TEST_BUILTINS_OBJS += test-advise.o
+TEST_BUILTINS_OBJS += test-bitmap.o
TEST_BUILTINS_OBJS += test-bloom.o
TEST_BUILTINS_OBJS += test-chmtime.o
TEST_BUILTINS_OBJS += test-config.o
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 5e998bdaa79..55dbb475120 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -113,7 +113,7 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
static struct object **seen_objects;
static unsigned int seen_objects_nr, seen_objects_alloc;
-static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused)
+void bitmap_writer_push_commit(struct commit *commit, struct ewah_bitmap *reused)
{
if (writer.selected_nr >= writer.selected_alloc) {
writer.selected_alloc = (writer.selected_alloc + 32) * 2;
@@ -402,7 +402,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
if (indexed_commits_nr < 100) {
for (i = 0; i < indexed_commits_nr; ++i)
- push_bitmapped_commit(indexed_commits[i], NULL);
+ bitmap_writer_push_commit(indexed_commits[i], NULL);
return;
}
@@ -440,7 +440,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits,
}
}
- push_bitmapped_commit(chosen, reused_bitmap);
+ bitmap_writer_push_commit(chosen, reused_bitmap);
i += next + 1;
display_progress(writer.progress, i);
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 1203120c432..a882efdb16a 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -74,6 +74,8 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack,
struct pack_idx_entry **index,
uint32_t index_nr);
void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack);
+void bitmap_writer_push_commit(struct commit *commit,
+ struct ewah_bitmap *reused);
void bitmap_writer_select_commits(struct commit **indexed_commits,
unsigned int indexed_commits_nr, int max_bitmaps);
void bitmap_writer_build(struct packing_data *to_pack);
diff --git a/t/helper/test-bitmap.c b/t/helper/test-bitmap.c
new file mode 100644
index 00000000000..9be03377c06
--- /dev/null
+++ b/t/helper/test-bitmap.c
@@ -0,0 +1,119 @@
+#define USE_THE_REPOSITORY_VARIABLE
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "commit.h"
+#include "packfile.h"
+#include "pack-bitmap.h"
+
+static int add_packed_object(const struct object_id *oid,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *_data)
+{
+ struct packing_data *packed = _data;
+ struct object_entry *entry;
+ struct object_info oi = OBJECT_INFO_INIT;
+ enum object_type type;
+
+ oi.typep = &type;
+
+ entry = packlist_alloc(packed, oid);
+ entry->in_pack_offset = nth_packed_object_offset(pack, pos);
+ entry->idx.offset = entry->in_pack_offset;
+ if (packed_object_info(the_repository, pack, entry->in_pack_offset, &oi) < 0)
+ die("could not get type of object %s",
+ oid_to_hex(oid));
+ oe_set_type(entry, type);
+ oe_set_in_pack(packed, entry, pack);
+
+ return 0;
+}
+
+static int idx_oid_cmp(const void *va, const void *vb)
+{
+ const struct pack_idx_entry *a = *(const struct pack_idx_entry **)va;
+ const struct pack_idx_entry *b = *(const struct pack_idx_entry **)vb;
+
+ return oidcmp(&a->oid, &b->oid);
+}
+
+static int bitmap_write(const char *basename)
+{
+ struct packed_git *p = NULL;
+ struct packing_data packed = { 0 };
+ struct pack_idx_entry **index;
+ struct strbuf buf = STRBUF_INIT;
+ uint32_t i;
+
+ prepare_repo_settings(the_repository);
+ for (p = get_all_packs(the_repository); p; p = p->next) {
+ if (!strcmp(pack_basename(p), basename))
+ break;
+ }
+
+ if (!p)
+ die("could not find pack '%s'", basename);
+
+ if (open_pack_index(p))
+ die("cannot open pack index for '%s'", p->pack_name);
+
+ prepare_packing_data(the_repository, &packed);
+
+ for_each_object_in_pack(p, add_packed_object, &packed,
+ FOR_EACH_OBJECT_PACK_ORDER);
+
+ /*
+ * Build the index array now that data.packed.objects[] is
+ * fully allocated (packlist_alloc() may have reallocated it
+ * during the loop above).
+ */
+ ALLOC_ARRAY(index, p->num_objects);
+ for (i = 0; i < p->num_objects; i++)
+ index[i] = &packed.objects[i].idx;
+
+ bitmap_writer_build_type_index(&packed, index, p->num_objects);
+
+ while (strbuf_getline_lf(&buf, stdin) != EOF) {
+ struct object_id oid;
+ struct commit *c;
+
+ if (get_oid_hex(buf.buf, &oid))
+ die("invalid OID: %s", buf.buf);
+
+ c = lookup_commit(the_repository, &oid);
+ if (!c || repo_parse_commit(the_repository, c))
+ die("could not parse commit %s", buf.buf);
+
+ bitmap_writer_push_commit(c, NULL);
+ }
+
+ bitmap_writer_build(&packed);
+
+ bitmap_writer_set_checksum(p->hash);
+
+ QSORT(index, p->num_objects, idx_oid_cmp);
+
+ strbuf_reset(&buf);
+ strbuf_addstr(&buf, p->pack_name);
+ strbuf_strip_suffix(&buf, ".pack");
+ strbuf_addstr(&buf, ".bitmap");
+ bitmap_writer_finish(index, p->num_objects, buf.buf, 0);
+
+ strbuf_release(&buf);
+ free(index);
+
+ return 0;
+}
+
+int cmd__bitmap(int argc, const char **argv)
+{
+ setup_git_directory();
+
+ if (argc == 3 && !strcmp(argv[1], "write"))
+ return bitmap_write(argv[2]);
+
+ usage("\ttest-tool bitmap write <pack-basename> < <commit-list>");
+
+ return -1;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 9d6d14d9293..c43d8c0977b 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -15,6 +15,7 @@ struct test_cmd {
static struct test_cmd cmds[] = {
{ "advise", cmd__advise_if_enabled },
+ { "bitmap", cmd__bitmap },
{ "bloom", cmd__bloom },
{ "chmtime", cmd__chmtime },
{ "config", cmd__config },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index a6470ff62c4..27e6e40ffcb 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -5,6 +5,7 @@
#include "git-compat-util.h"
int cmd__advise_if_enabled(int argc, const char **argv);
+int cmd__bitmap(int argc, const char **argv);
int cmd__bloom(int argc, const char **argv);
int cmd__chmtime(int argc, const char **argv);
int cmd__config(int argc, const char **argv);
--- >8 ---
> > On our same testing repository from previous commits, this change shrunk
> > our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
> > ~72.24% reduction in the on-disk size of our *.bitmap file. The time to
> > generate the smaller bitmap file decreased by ~3.69 seconds, though this
> > is likely mostly noise.
>
> Certainly good numbers. The obvious follow-up question is: how does the
> reading side fare? I'd expect it to be a little better, if only because
> there are fewer bytes to consider when XOR-ing. But if there's some
> hidden assumption we're missing, then it could get wildly worse. It
> would be good to confirm that that didn't happen. ;)
It doesn't make a huge difference. Prior to this patch, the timings on
my test repository for 'git rev-list --count --all --objects
--use-bitmap-index' go from:
- 2.180 ± 0.019 seconds (with pseudo-merges)
- 52.149 ± 0.224 seconds (without pseudo-merges)
, and after applying this patch, it changes to:
- 2.611 ± 0.023 seconds (with pseudo-merges)
- 51.963 ± 0.131 seconds (without pseudo-merges)
It looks like there is a minor slow-down on pseudo-merges, and a minor
speed-up without them. The difference is small enough that I'm willing
to treat it as run-to-run noise.
> > static void compute_xor_offsets(struct bitmap_writer *writer)
> > {
> > static const int MAX_XOR_OFFSET_SEARCH = 10;
> >
> > int i, next = 0;
> > + int nr = bitmap_writer_nr_selected_commits(writer);
> > +
> > + if (nr > 1) {
> > + QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
> > +
> > + for (i = 0; i < nr; i++) {
> > + struct bitmapped_commit *stored = &writer->selected[i];
> > + khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
> > + stored->commit->object.oid);
> > +
> > + if (hash_pos == kh_end(writer->bitmaps))
> > + BUG("selected commit missing from bitmap map: %s",
> > + oid_to_hex(&stored->commit->object.oid));
> > +
> > + kh_value(writer->bitmaps, hash_pos) = stored;
> > + }
> > + }
>
> OK. It took me a minute to wrap my head around this. The real work is
> done by QSORT(). But because we maintain a hash pointing into that
> array, we have to go through each hash entry and fix up its pointer.
Yup.
> Looks correct.
Thanks,
Taylor
[^1]: ...and I have great empathy for Stolee's suffering here when
benchmarking his performance improvements to the bitmap generation
code from back in the day! ;-).
^ permalink raw reply related [flat|nested] 34+ messages in thread
* [PATCH 7/8] pack-bitmap: remember pseudo-merge parents
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (5 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 6/8] pack-bitmap: sort bitmaps before XORing Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-19 16:12 ` [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps Taylor Blau
` (2 subsequent siblings)
9 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
write_pseudo_merges() currently builds an array of temporary bitmaps for
the parent set of each pseudo-merge, then serializes those bitmaps later
while writing the extension.
Move those parent bitmaps onto the corresponding bitmapped_commit
entries instead. This keeps the on-disk output unchanged, but gives the
parent bitmap the same lifetime and access pattern that later changes
will use when pseudo-merge object bitmaps are built before the write
step.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 66282ea14b5..8200aed6101 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -32,6 +32,7 @@ struct bitmapped_commit {
struct commit *commit;
struct ewah_bitmap *bitmap;
struct ewah_bitmap *write_as;
+ struct ewah_bitmap *pseudo_merge_parents;
int flags;
int xor_offset;
uint32_t commit_pos;
@@ -102,6 +103,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
if (bc->write_as != bc->bitmap)
ewah_free(bc->write_as);
ewah_free(bc->bitmap);
+ ewah_free(bc->pseudo_merge_parents);
}
free(writer->selected);
}
@@ -210,6 +212,7 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
writer->selected[writer->selected_nr].write_as = NULL;
writer->selected[writer->selected_nr].flags = 0;
writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge;
+ writer->selected[writer->selected_nr].pseudo_merge_parents = NULL;
writer->selected_nr++;
}
@@ -1004,42 +1007,47 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
struct hashfile *f)
{
struct oid_array commits = OID_ARRAY_INIT;
- struct bitmap **commits_bitmap = NULL;
off_t *pseudo_merge_ofs = NULL;
off_t start, table_start, next_ext;
uint32_t base = bitmap_writer_nr_selected_commits(writer);
size_t i, j = 0;
- CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr);
CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
struct bitmapped_commit *merge = &writer->selected[base + i];
struct commit_list *p;
+ struct bitmap *parents = bitmap_new();
if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
- commits_bitmap[i] = bitmap_new();
-
for (p = merge->commit->parents; p; p = p->next)
- bitmap_set(commits_bitmap[i],
+ bitmap_set(parents,
find_object_pos(writer, &p->item->object.oid,
NULL));
+
+ merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+ bitmap_free(parents);
}
start = hashfile_total(f);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
- struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]);
+ struct bitmapped_commit *merge = &writer->selected[base + i];
+
+ if (!merge->pseudo_merge)
+ BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
+
+ if (!merge->pseudo_merge_parents)
+ BUG("missing pseudo-merge parents bitmap for commit %s",
+ oid_to_hex(&merge->commit->object.oid));
pseudo_merge_ofs[i] = hashfile_total(f);
- dump_bitmap(f, commits_ewah);
+ dump_bitmap(f, merge->pseudo_merge_parents);
dump_bitmap(f, writer->selected[base+i].write_as);
-
- ewah_free(commits_ewah);
}
next_ext = st_add(hashfile_total(f),
@@ -1122,12 +1130,8 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
hashwrite_be64(f, table_start - start);
hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t));
- for (i = 0; i < writer->pseudo_merges_nr; i++)
- bitmap_free(commits_bitmap[i]);
-
oid_array_clear(&commits);
free(pseudo_merge_ofs);
- free(commits_bitmap);
}
static int table_cmp(const void *_va, const void *_vb, void *_data)
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (6 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 7/8] pack-bitmap: remember pseudo-merge parents Taylor Blau
@ 2026-05-19 16:12 ` Taylor Blau
2026-05-27 10:25 ` Jeff King
2026-05-27 10:27 ` [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Jeff King
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
9 siblings, 1 reply; 34+ messages in thread
From: Taylor Blau @ 2026-05-19 16:12 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
When generating bitmaps, `bitmap_builder_init()` starts with an initial
selection of commits to receive bitmap coverage, and then determines a
set of "maximal" commits based on its input.
Commit 089f751360f (pack-bitmap-write: build fewer intermediate bitmaps,
2020-12-08) has extensive details, but the gist is as follows:
Each selected commit starts with one commit_mask bit in its "commit
mask" bitmap. Then, we walk the first-parent history in topological
order and OR each commit's mask into its (first) parent. Whenever that
OR results in the parent having more bits set, the child is deemed to be
non-maximal, and the frontier is pushed further back along the first
parent history.
That approach works extremely well for ordinary selected commits, whose
first-parent histories often describe real sharing between the bitmaps
we are going to write.
It struggles, however, to efficiently generate pseudo-merge bitmaps.
Unlike ordinary commits for which the above algorithm is designed,
pseudo-merges don't represent any "real" commit in history, just a
grouping of non-bitmapped reference tips. In that sense, their first
parent is just a part of a larger set, and treating them like ordinary
selected commits imposes a significant slow-down when generating bitmaps
with pseudo-merges enabled.
Consider partitioning all non-bitmapped reference tips into eight
individual pseudo-merges via the following configuration:
[bitmapPseudoMerge "all"]
pattern=refs/
threshold=now
stableSize=10000000
maxMerges=8
, the cost of generating a bitmap from scratch rises significantly:
+------------------+-----------------+---------------+---------------------+
| | no pseudo-merge | pseudo-merges | Delta |
| | | (HEAD^) | |
+------------------+-----------------+---------------+---------------------+
| elapsed | 294.1 s | 575.0 s | +280.9 s (+95.5%) |
| cycles | 1,365.5 B | 2,686.9 B | +1,321.4 B (+96.8%) |
| instructions | 1,389.8 B | 2,546.6 B | +1,156.8 B (+83.2%) |
| CPI | 0.983 | 1.055 | +0.073 (+7.4%) |
+------------------+-----------------+---------------+---------------------+
This is a particularly poor trade-off, because the time saved by these
pseudo-merges during, e.g.,
$ git rev-list --count --all --objects --use-bitmap-index
is only:
$ hyperfine -L v true,false -n 'pseudo-merges: {v}' '
GIT_TEST_USE_PSEUDO_MERGES={v} git.compile rev-list --count \
--objects --all --use-bitmap-index
'
Benchmark 1: pseudo-merges: true
Time (mean ± σ): 2.613 s ± 0.012 s [User: 2.308 s, System: 0.305 s]
Range (min … max): 2.594 s … 2.633 s 10 runs
Benchmark 2: pseudo-merges: false
Time (mean ± σ): 52.205 s ± 0.170 s [User: 51.500 s, System: 0.697 s]
Range (min … max): 51.956 s … 52.458 s 10 runs
Summary
pseudo-merges: true ran
19.98 ± 0.11 times faster than pseudo-merges: false
In other words, we pay a nearly ~5 minute penalty to generate
pseudo-merge bitmaps, but only save ~50 seconds during traversal.
The problem stems from injecting pseudo-merges into the bitmap builder
as if they were normal commits. The maximal commit selection algorithm
was simply not designed for that case, and performs predictably poorly.
The only reason we reused the maximal commit selection routine for
pseudo-merges alongside regular non-pseudo-merge commits is because we
represent them both as commit objects (where the pseudo-merge commits
just represent a made-up commit as opposed to one that actually exists
in a repository's object store).
Instead, build the regular selected commit bitmaps first, considering
only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
bitmaps have been stored, build each pseudo-merge bitmap separately and
attach its parent and object bitmaps to the corresponding pseudo-merge
entry before writing the extension.
This keeps the regular bitmap build shaped like the no-pseudo-merge
case. The later pseudo-merge fill can still stop at stored selected
ancestor bitmaps, so it does not have to rewalk each pseudo-merge
closure from scratch.
When an existing bitmap has the same pseudo-merge parent set, reuse and
remap that whole pseudo-merge bitmap before falling back to
fill_bitmap_commit(). This preserves the benefit of stable pseudo-merges
while keeping the on-disk format and reader behavior unchanged.
As a result, the overhead cost for generating pseudo-merges in the above
configuration is much smaller:
+------------------+-----------------+---------------+-------------------+
| | no pseudo-merge | pseudo-merges | Delta |
| | | (HEAD) | |
+------------------+-----------------+---------------+-------------------+
| elapsed | 294.1 s | 328.4 s | +34.3 s (+11.7%) |
| cycles | 1,365.5 B | 1,529.3 B | +163.7 B (+12.0%) |
| instructions | 1,389.8 B | 1,552.8 B | +163.0 B (+11.7%) |
| CPI | 0.983 | 0.985 | +0.002 (+0.2%) |
+------------------+-----------------+---------------+-------------------+
Recall that at the start of this series, generating reachability bitmaps
took 612.5 seconds *without* pseudo-merges. With this commit, it is
still ~46.38% *faster* to generate reachability bitmaps *with*
pseudo-merges than it was to generate bitmaps wihtout them at the
beginning of this series.
The changes to implement this are mostly straightforward. We exclude
pseudo-merge commits from the existing bitmap generation, and walk over
them in a separate pass, by either reusing an existing on-disk
pseudo-merge, or passing the pseudo-merge commit itself back to the
existing routine in `fill_bitmap_commit()`.
(Note that the routine to build pseudo-merge bitmaps is the same both
before and after this change, the difference is only that we do not let
psuedo-merges participate in determining the set of maximal commits.)
The only wrinkle is that `fill_bitmap_commit()` must be taught to not
expect that all tree objects have been parsed, which is the case for any
portion of history reachable by one or more pseudo-merge(s), but not by
any non-pseudo-merge commit selected for bitmapping.
Now that we have decoupled how we generate pseudo-merges from their
representation, the following commits will improve the API around
specifying pseudo-merge groupings during bitmap generation.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 210 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 174 insertions(+), 36 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 8200aed6101..1bcb3f98a42 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -446,13 +446,17 @@ static void bitmap_builder_init(struct bitmap_builder *bb,
revs.topo_order = 1;
revs.first_parent_only = 1;
- for (i = 0; i < writer->selected_nr; i++) {
+ for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) {
struct bitmapped_commit *bc = &writer->selected[i];
struct bb_commit *ent = bb_data_at(&bb->data, bc->commit);
+ if (bc->pseudo_merge)
+ BUG("unexpected pseudo-merge at %"PRIuMAX,
+ (uintmax_t)i);
+
ent->selected = 1;
ent->maximal = 1;
- ent->pseudo_merge = bc->pseudo_merge;
+ ent->pseudo_merge = 0;
ent->idx = i;
ent->commit_mask = bitmap_new();
@@ -618,6 +622,8 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
static int reused_bitmaps_nr;
static int reused_pseudo_merge_bitmaps_nr;
+static int pseudo_merge_bitmap_nr;
+static int pseudo_merge_bitmap_parents;
static int fill_bitmap_commit_calls_nr;
static int fill_bitmap_commit_found_ancestor_nr;
@@ -631,8 +637,12 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
const uint32_t *mapping)
{
int found;
+ int from_pseudo_merge = commit->object.flags & BITMAP_PSEUDO_MERGE;
uint32_t pos;
+ if (ent->pseudo_merge)
+ BUG("unexpected pseudo-merge commit in fill_bitmap_commit()");
+
fill_bitmap_commit_calls_nr++;
if (!ent->bitmap)
@@ -648,10 +658,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
struct ewah_bitmap *old;
struct bitmap *remapped = bitmap_new();
- if (commit->object.flags & BITMAP_PSEUDO_MERGE)
- old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
- else
- old = bitmap_for_commit(old_bitmap, c);
+ old = bitmap_for_commit(old_bitmap, c);
/*
* If this commit has an old bitmap, then translate that
* bitmap and add its bits to this one. No need to walk
@@ -660,10 +667,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
if (old && !rebuild_bitmap(mapping, old, remapped)) {
bitmap_or(ent->bitmap, remapped);
bitmap_free(remapped);
- if (commit->object.flags & BITMAP_PSEUDO_MERGE)
- reused_pseudo_merge_bitmaps_nr++;
- else
- reused_bitmaps_nr++;
+ reused_bitmaps_nr++;
continue;
}
bitmap_free(remapped);
@@ -696,12 +700,32 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
* walk ensures we cover all parents.
*/
if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
+ struct tree *tree;
+
+ if (from_pseudo_merge && !c->object.parsed) {
+ /*
+ * Commits reachable from selected
+ * non-pseudo-merges are already parsed
+ * by the regular bitmap build.
+ *
+ * However, pseudo-merge fills can also
+ * reach commits that were not covered
+ * there, so parse any such leftovers
+ * before reading their tree or parents.
+ */
+ if (repo_parse_commit(writer->repo, c))
+ return -1;
+ }
+
pos = find_object_pos(writer, &c->object.oid, &found);
if (!found)
return -1;
bitmap_set(ent->bitmap, pos);
- prio_queue_put(tree_queue,
- repo_get_commit_tree(writer->repo, c));
+
+ tree = repo_get_commit_tree(writer->repo, c);
+ if (!tree)
+ return -1;
+ prio_queue_put(tree_queue, tree);
}
for (p = c->parents; p; p = p->next) {
@@ -738,6 +762,137 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
return 0;
}
+static int reuse_pseudo_merge_bitmap(struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ struct commit *merge,
+ struct ewah_bitmap **out)
+{
+ struct ewah_bitmap *old;
+ struct bitmap *remapped;
+
+ if (!old_bitmap || !mapping)
+ return 0;
+
+ old = pseudo_merge_bitmap_for_commit(old_bitmap, merge);
+ if (!old)
+ return 0;
+
+ remapped = bitmap_new();
+ if (rebuild_bitmap(mapping, old, remapped) < 0) {
+ bitmap_free(remapped);
+ return 0;
+ }
+
+ *out = bitmap_to_ewah(remapped);
+ bitmap_free(remapped);
+ reused_pseudo_merge_bitmaps_nr++;
+ return 1;
+}
+
+static int build_pseudo_merge_bitmap(struct bitmap_writer *writer,
+ struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ struct commit *merge,
+ struct ewah_bitmap **out)
+{
+ struct bb_commit ent = { 0 };
+ struct prio_queue queue = { NULL };
+ struct prio_queue tree_queue = { NULL };
+ unsigned parents = commit_list_count(merge->parents);
+ int ret;
+
+ ent.bitmap = bitmap_new();
+
+ pseudo_merge_bitmap_nr++;
+ pseudo_merge_bitmap_parents += parents;
+
+ if (reuse_pseudo_merge_bitmap(old_bitmap, mapping, merge, out)) {
+ ret = 0;
+ goto done;
+ }
+
+ ret = fill_bitmap_commit(writer, &ent, merge, &queue, &tree_queue,
+ old_bitmap, mapping);
+
+ if (!ret)
+ *out = bitmap_to_ewah(ent.bitmap);
+
+done:
+ bitmap_free(ent.bitmap);
+ clear_prio_queue(&queue);
+ clear_prio_queue(&tree_queue);
+
+ return ret;
+}
+
+static int build_pseudo_merge_bitmaps(struct bitmap_writer *writer,
+ struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ int *nr_stored)
+{
+ size_t i = bitmap_writer_nr_selected_commits(writer);
+ int ret = 0;
+
+ if (!writer->pseudo_merges_nr)
+ return 0;
+
+ trace2_region_enter("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+ writer->repo);
+
+ for (; i < writer->selected_nr; i++) {
+ struct bitmapped_commit *merge = &writer->selected[i];
+ struct commit_list *p;
+ struct bitmap *parents = bitmap_new();
+ struct ewah_bitmap *objects = NULL;
+
+ if (!merge->pseudo_merge)
+ BUG("found non-pseudo merge commit at %"PRIuMAX,
+ (uintmax_t)i);
+
+ for (p = merge->commit->parents; p; p = p->next) {
+ int found;
+ uint32_t pos = find_object_pos(writer,
+ &p->item->object.oid,
+ &found);
+ if (!found) {
+ bitmap_free(parents);
+ ret = -1;
+ goto done;
+ }
+ bitmap_set(parents, pos);
+ }
+
+ merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+ bitmap_free(parents);
+
+ if (build_pseudo_merge_bitmap(writer, old_bitmap, mapping,
+ merge->commit, &objects) < 0) {
+ ret = -1;
+ goto done;
+ }
+ merge->bitmap = objects;
+
+ (*nr_stored)++;
+ display_progress(writer->progress, *nr_stored);
+ }
+
+done:
+ trace2_region_leave("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+ writer->repo);
+
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "pseudo_merge_bitmap_nr",
+ pseudo_merge_bitmap_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "building_bitmaps_pseudo_merge_reused",
+ reused_pseudo_merge_bitmaps_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "pseudo_merge_bitmap_parents",
+ pseudo_merge_bitmap_parents);
+
+ return ret;
+}
+
static void store_selected(struct bitmap_writer *writer,
struct bb_commit *ent, struct commit *commit)
{
@@ -821,6 +976,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
bitmap_free(ent->bitmap);
ent->bitmap = NULL;
}
+ if (closed &&
+ build_pseudo_merge_bitmaps(writer, old_bitmap, mapping,
+ &nr_stored) < 0)
+ closed = 0;
clear_prio_queue(&queue);
clear_prio_queue(&tree_queue);
bitmap_builder_clear(&bb);
@@ -831,9 +990,6 @@ int bitmap_writer_build(struct bitmap_writer *writer)
writer->repo);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_reused", reused_bitmaps_nr);
- trace2_data_intmax("pack-bitmap-write", writer->repo,
- "building_bitmaps_pseudo_merge_reused",
- reused_pseudo_merge_bitmaps_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"fill_bitmap_commit_calls_nr",
fill_bitmap_commit_calls_nr);
@@ -1015,23 +1171,6 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
- for (i = 0; i < writer->pseudo_merges_nr; i++) {
- struct bitmapped_commit *merge = &writer->selected[base + i];
- struct commit_list *p;
- struct bitmap *parents = bitmap_new();
-
- if (!merge->pseudo_merge)
- BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
-
- for (p = merge->commit->parents; p; p = p->next)
- bitmap_set(parents,
- find_object_pos(writer, &p->item->object.oid,
- NULL));
-
- merge->pseudo_merge_parents = bitmap_to_ewah(parents);
- bitmap_free(parents);
- }
-
start = hashfile_total(f);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
@@ -1040,14 +1179,13 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
- if (!merge->pseudo_merge_parents)
- BUG("missing pseudo-merge parents bitmap for commit %s",
+ if (!merge->pseudo_merge_parents || !merge->bitmap)
+ BUG("missing pseudo-merge bitmap for commit %s",
oid_to_hex(&merge->commit->object.oid));
pseudo_merge_ofs[i] = hashfile_total(f);
-
dump_bitmap(f, merge->pseudo_merge_parents);
- dump_bitmap(f, writer->selected[base+i].write_as);
+ dump_bitmap(f, merge->bitmap);
}
next_ext = st_add(hashfile_total(f),
--
2.54.0.rc1.84.g30ce254312c
^ permalink raw reply related [flat|nested] 34+ messages in thread* Re: [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
2026-05-19 16:12 ` [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps Taylor Blau
@ 2026-05-27 10:25 ` Jeff King
2026-05-27 19:24 ` Taylor Blau
0 siblings, 1 reply; 34+ messages in thread
From: Jeff King @ 2026-05-27 10:25 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:55PM -0400, Taylor Blau wrote:
> Each selected commit starts with one commit_mask bit in its "commit
> mask" bitmap. Then, we walk the first-parent history in topological
> order and OR each commit's mask into its (first) parent. Whenever that
> OR results in the parent having more bits set, the child is deemed to be
> non-maximal, and the frontier is pushed further back along the first
> parent history.
>
> That approach works extremely well for ordinary selected commits, whose
> first-parent histories often describe real sharing between the bitmaps
> we are going to write.
>
> It struggles, however, to efficiently generate pseudo-merge bitmaps.
> Unlike ordinary commits for which the above algorithm is designed,
> pseudo-merges don't represent any "real" commit in history, just a
> grouping of non-bitmapped reference tips. In that sense, their first
> parent is just a part of a larger set, and treating them like ordinary
> selected commits imposes a significant slow-down when generating bitmaps
> with pseudo-merges enabled.
This is a great explanation of the problem, and especially this:
> In other words, we pay a nearly ~5 minute penalty to generate
> pseudo-merge bitmaps, but only save ~50 seconds during traversal.
makes it clear that we're doing something sub-optimal. And it points us
in the right direction, since that traversal should be able to generate
the pseudo-merge bitmap we need in the first place! So that should be
our goal to work towards.
> Instead, build the regular selected commit bitmaps first, considering
> only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
> bitmaps have been stored, build each pseudo-merge bitmap separately and
> attach its parent and object bitmaps to the corresponding pseudo-merge
> entry before writing the extension.
And then this solution follows naturally from the earlier explanations.
Good.
In some ways this goes back to the pre-v2.31 way of generating bitmaps,
which is to just traverse for each bitmap independently. But as you
note, the whole idea of pseudo-merge bitmaps is that they aren't
overlapping in any meaningful way. So doing one fill-in traversal per
pseudo-merge makes sense, and hopefully we hit enough real bitmaps that
it's not too costly.
> As a result, the overhead cost for generating pseudo-merges in the above
> configuration is much smaller:
>
> +------------------+-----------------+---------------+-------------------+
> | | no pseudo-merge | pseudo-merges | Delta |
> | | | (HEAD) | |
> +------------------+-----------------+---------------+-------------------+
> | elapsed | 294.1 s | 328.4 s | +34.3 s (+11.7%) |
> | cycles | 1,365.5 B | 1,529.3 B | +163.7 B (+12.0%) |
> | instructions | 1,389.8 B | 1,552.8 B | +163.0 B (+11.7%) |
> | CPI | 0.983 | 0.985 | +0.002 (+0.2%) |
> +------------------+-----------------+---------------+-------------------+
Nice. The time savings are going to depend on how many pseudo-merges we
generate, I think. And I'd guess that the numbers above come from making
one big pseudo-merge bitmap, per the config you showed earlier. But you
probably only want a handful of them in any repo, so hopefully it
doesn't scale _too_ badly.
> Recall that at the start of this series, generating reachability bitmaps
> took 612.5 seconds *without* pseudo-merges. With this commit, it is
> still ~46.38% *faster* to generate reachability bitmaps *with*
> pseudo-merges than it was to generate bitmaps wihtout them at the
> beginning of this series.
Sure, though 612.5 seconds is all in the distant past. We only care
about 294.1 seconds now. ;)
More seriously, I do think the interesting question here is how the time
scales for various pseudo-merge configurations. I don't know if we have
any real operational experience with them yet. The original idea is that
you might slice up the ref space into a few chunks. I'd guess that the
old code performed badly-ish overall, but the time did not grow all that
much as you increased the number of chunks. But with the new code, I
suspect that the cost grows more linearly with number of chunks. That's
just a guess, though.
The other thing we hope for with pseudo-merges is that the chunks are
selected such that most of the chunks don't change (because they are
composed of old, stable refs). So in subsequent bitmap generations, we
can either reuse them either verbatim or as a starting point (if there
were only additions). But all of that is going to be heuristic and
depend on your config, the changes the repo sees over time, and so on.
So I don't know if we'd really have good numbers on that.
> Now that we have decoupled how we generate pseudo-merges from their
> representation, the following commits will improve the API around
> specifying pseudo-merge groupings during bitmap generation.
I think we're at patch 8/8 here. I guess you have more to come
eventually, but for now this part is just misleading. ;)
> pack-bitmap-write.c | 210 ++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 174 insertions(+), 36 deletions(-)
The patch looks reasonable, though I'm not all that familiar with the
ins and outs of the pseudo-merge code. I'd trust the tests here more
than my review.
> @@ -696,12 +700,32 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
> * walk ensures we cover all parents.
> */
> if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
> + struct tree *tree;
> +
> + if (from_pseudo_merge && !c->object.parsed) {
> + /*
> + * Commits reachable from selected
> + * non-pseudo-merges are already parsed
> + * by the regular bitmap build.
> + *
> + * However, pseudo-merge fills can also
> + * reach commits that were not covered
> + * there, so parse any such leftovers
> + * before reading their tree or parents.
> + */
> + if (repo_parse_commit(writer->repo, c))
> + return -1;
> + }
Makes sense. This should be a quick noop for the regular bitmap build,
since we'll have the parsed flag set. And it should even allow use of
the commit-graph if it's available.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
2026-05-27 10:25 ` Jeff King
@ 2026-05-27 19:24 ` Taylor Blau
0 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:24 UTC (permalink / raw)
To: Jeff King; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Wed, May 27, 2026 at 06:25:34AM -0400, Jeff King wrote:
> > It struggles, however, to efficiently generate pseudo-merge bitmaps.
> > Unlike ordinary commits for which the above algorithm is designed,
> > pseudo-merges don't represent any "real" commit in history, just a
> > grouping of non-bitmapped reference tips. In that sense, their first
> > parent is just a part of a larger set, and treating them like ordinary
> > selected commits imposes a significant slow-down when generating bitmaps
> > with pseudo-merges enabled.
>
> This is a great explanation of the problem, and especially this:
>
> > In other words, we pay a nearly ~5 minute penalty to generate
> > pseudo-merge bitmaps, but only save ~50 seconds during traversal.
>
> makes it clear that we're doing something sub-optimal. And it points us
> in the right direction, since that traversal should be able to generate
> the pseudo-merge bitmap we need in the first place! So that should be
> our goal to work towards.
>
> > Instead, build the regular selected commit bitmaps first, considering
> > only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
> > bitmaps have been stored, build each pseudo-merge bitmap separately and
> > attach its parent and object bitmaps to the corresponding pseudo-merge
> > entry before writing the extension.
>
> And then this solution follows naturally from the earlier explanations.
> Good.
Thanks. For as clear as this sounds now, finding this approach took me
longer than I'd like to admit. I'm satisfied, however, with the result.
> In some ways this goes back to the pre-v2.31 way of generating bitmaps,
> which is to just traverse for each bitmap independently. But as you
> note, the whole idea of pseudo-merge bitmaps is that they aren't
> overlapping in any meaningful way. So doing one fill-in traversal per
> pseudo-merge makes sense, and hopefully we hit enough real bitmaps that
> it's not too costly.
Exactly!
> > As a result, the overhead cost for generating pseudo-merges in the above
> > configuration is much smaller:
> >
> > +------------------+-----------------+---------------+-------------------+
> > | | no pseudo-merge | pseudo-merges | Delta |
> > | | | (HEAD) | |
> > +------------------+-----------------+---------------+-------------------+
> > | elapsed | 294.1 s | 328.4 s | +34.3 s (+11.7%) |
> > | cycles | 1,365.5 B | 1,529.3 B | +163.7 B (+12.0%) |
> > | instructions | 1,389.8 B | 1,552.8 B | +163.0 B (+11.7%) |
> > | CPI | 0.983 | 0.985 | +0.002 (+0.2%) |
> > +------------------+-----------------+---------------+-------------------+
>
> Nice. The time savings are going to depend on how many pseudo-merges we
> generate, I think. And I'd guess that the numbers above come from making
> one big pseudo-merge bitmap, per the config you showed earlier. But you
> probably only want a handful of them in any repo, so hopefully it
> doesn't scale _too_ badly.
That's right, though see below for more thoughts on scaling...
> > Recall that at the start of this series, generating reachability bitmaps
> > took 612.5 seconds *without* pseudo-merges. With this commit, it is
> > still ~46.38% *faster* to generate reachability bitmaps *with*
> > pseudo-merges than it was to generate bitmaps wihtout them at the
> > beginning of this series.
>
> Sure, though 612.5 seconds is all in the distant past. We only care
> about 294.1 seconds now. ;)
Heh ;-). Naturally, I agree here, but wanted to include it for context.
I wanted to point out that the accumulated changes in this series make
it cheaper to generate bitmaps with pseudo-merges now than it was to
generate bitmaps without them before.
> More seriously, I do think the interesting question here is how the time
> scales for various pseudo-merge configurations. I don't know if we have
> any real operational experience with them yet. The original idea is that
> you might slice up the ref space into a few chunks. I'd guess that the
> old code performed badly-ish overall, but the time did not grow all that
> much as you increased the number of chunks. But with the new code, I
> suspect that the cost grows more linearly with number of chunks. That's
> just a guess, though.
I'm not aware of any large-scale deployments of pseudo-merge bitmaps.
This series is written (in part) of the hopes of making one ;-). I think
your intuition on the old code matches my own.
Below are some numbers that give you a sense of how the runtime scales
with the number of pseudo-merges. I'm relying exclusively on "stable"
pseudo-merges here since they have more predictable bucketing behavior,
though note that there isn't an exact way to dial in the number of these
so-called "stable" pseudo-merge groups. We can only control their *size*
(in terms of number of parents), so I ran the harness which produced the
above code with powers of 10 between [10^3, 10^6].
Results are as follows:
+------------+-------+----------+
| stableSize | count | time (s) |
+------------+-------+----------+
| 1000000 | 1 | 34.963 |
| 100000 | 3 | 36.954 |
| 10000 | 26 | 221.963 |
| 1000 | 252 | 2779.373 |
+------------+-------+----------+
Which scales roughly like O(x^1.165) (the best fit function I could find
was t(n) = 25.18 + 4.386 * n^1.165, where 'n' is the number of
pseudo-merges, and t(n) is the time it took to generate them).
So it does grow faster than linearly, but it's not too bad. The jump
from 26 to 252 pseudo-merges is pretty significant, though, but having
that many pseudo-merges is probably not something that we would want to
do in practice.
> The other thing we hope for with pseudo-merges is that the chunks are
> selected such that most of the chunks don't change (because they are
> composed of old, stable refs). So in subsequent bitmap generations, we
> can either reuse them either verbatim or as a starting point (if there
> were only additions). But all of that is going to be heuristic and
> depend on your config, the changes the repo sees over time, and so on.
>
> So I don't know if we'd really have good numbers on that.
We don't, and it is somewhat of a pain to simulate. I think the proof
will be in the pudding, so to speak.
> > Now that we have decoupled how we generate pseudo-merges from their
> > representation, the following commits will improve the API around
> > specifying pseudo-merge groupings during bitmap generation.
>
> I think we're at patch 8/8 here. I guess you have more to come
> eventually, but for now this part is just misleading. ;)
Yeah, I cleaved this off of a larger series to make the pseudo-merge API
a little easier to reason about and less clunky to use. But I ended up
hoarding some of those patches, and apparently forgot to adjust the
message here. Thanks for spotting.
Thanks,
Taylor
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [PATCH 0/8] pack-bitmap-write: speed up bitmap generation
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (7 preceding siblings ...)
2026-05-19 16:12 ` [PATCH 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps Taylor Blau
@ 2026-05-27 10:27 ` Jeff King
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
9 siblings, 0 replies; 34+ messages in thread
From: Jeff King @ 2026-05-27 10:27 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Junio C Hamano, Elijah Newren, Derrick Stolee
On Tue, May 19, 2026 at 12:12:33PM -0400, Taylor Blau wrote:
> This series improves the performance of reachability bitmap generation,
> focusing on very large repositories and the penalty to generate
> pseudo-merge reachability bitmaps.
Very nice numbers. I'm especially excited about patches 1-6, which are
really just speeding up bitmap generation in general, pseudo-merges
aside. I think you could even have split those off into their own
series, and then built the final two as a separate topic, but I am happy
either way.
I looked through the patches and had nothing to complain about.
-Peff
^ permalink raw reply [flat|nested] 34+ messages in thread* [PATCH v2 0/8] pack-bitmap-write: speed up bitmap generation
2026-05-19 16:12 [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Taylor Blau
` (8 preceding siblings ...)
2026-05-27 10:27 ` [PATCH 0/8] pack-bitmap-write: speed up bitmap generation Jeff King
@ 2026-05-27 19:55 ` Taylor Blau
2026-05-27 19:55 ` [PATCH v2 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
` (7 more replies)
9 siblings, 8 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
Here is a reroll of my series to improve the performance of reachability
bitmap generation, focusing on very large repositories and the penalty
to generate pseudo-merge reachability bitmaps.
The series is largely unchanged since last time. Notable changes in this
round include:
- minor refactoring in the pair of patches which consolidate the
`find_object_pos()` success path and introduce the object position
cache during bitmap fills, and
- dropping a stale paragraph from the final patch's message, which
described follow-up commits that are no longer part of this series.
As usual, a range-diff against v1 is included below for convenience.
Thanks in advance for your review!
Taylor Blau (8):
pack-bitmap: pass object position to `fill_bitmap_tree()`
pack-bitmap: check subtree bits before recursing
pack-bitmap: reuse stored selected bitmaps
pack-bitmap: consolidate `find_object_pos()` success path
pack-bitmap: cache object positions during fill
pack-bitmap: sort bitmaps before XORing
pack-bitmap: remember pseudo-merge parents
pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
pack-bitmap-write.c | 431 +++++++++++++++++++++++++++++++++++++-------
pack-bitmap.h | 7 +
2 files changed, 377 insertions(+), 61 deletions(-)
Range-diff against v1:
1: 13191c19b91 = 1: ad025810ab3 pack-bitmap: pass object position to `fill_bitmap_tree()`
2: 7d6d1cec0dd = 2: 59da63d0330 pack-bitmap: check subtree bits before recursing
3: 6e1f6bef5f6 = 3: f13d65c0ad9 pack-bitmap: reuse stored selected bitmaps
4: c9a56066094 ! 4: 856aa3a6ab7 pack-bitmap: consolidate `find_object_pos()` success path
@@ Commit message
Signed-off-by: Taylor Blau <me@ttaylorr.com>
## pack-bitmap-write.c ##
+@@ pack-bitmap-write.c: static uint32_t find_object_pos(struct bitmap_writer *writer,
+ const struct object_id *oid, int *found)
+ {
+ struct object_entry *entry;
++ uint32_t pos;
+
+ entry = packlist_find(writer->to_pack, oid);
+ if (entry) {
@@ pack-bitmap-write.c: static uint32_t find_object_pos(struct bitmap_writer *writer,
if (writer->midx)
base_objects = writer->midx->num_objects +
5: e43ef6a42d1 ! 5: 70dfa80d543 pack-bitmap: cache object positions during fill
@@ pack-bitmap-write.c: void bitmap_writer_push_commit(struct bitmap_writer *writer
const struct object_id *oid, int *found)
{
struct object_entry *entry;
-+ uint32_t pos;
-+
+ uint32_t pos;
+
+ bitmap_writer_init_pos_cache(writer);
+
+ if (find_cached_object_pos(writer, oid, &pos)) {
@@ pack-bitmap-write.c: void bitmap_writer_push_commit(struct bitmap_writer *writer
+ *found = 1;
+ return pos;
+ }
-
++
entry = packlist_find(writer->to_pack, oid);
if (entry) {
uint32_t base_objects = 0;
6: b0a4f31353a = 6: b1184792d23 pack-bitmap: sort bitmaps before XORing
7: 0bd88e6a096 = 7: 673b6262911 pack-bitmap: remember pseudo-merge parents
8: 30ce254312c ! 8: 8722242f1bb pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
@@ Commit message
portion of history reachable by one or more pseudo-merge(s), but not by
any non-pseudo-merge commit selected for bitmapping.
- Now that we have decoupled how we generate pseudo-merges from their
- representation, the following commits will improve the API around
- specifying pseudo-merge groupings during bitmap generation.
-
Signed-off-by: Taylor Blau <me@ttaylorr.com>
## pack-bitmap-write.c ##
base-commit: c3d7ca7d982efc3a848fd85f34e867cfc0a99479
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply [flat|nested] 34+ messages in thread* [PATCH v2 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()`
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
@ 2026-05-27 19:55 ` Taylor Blau
2026-05-27 19:55 ` [PATCH v2 2/8] pack-bitmap: check subtree bits before recursing Taylor Blau
` (6 subsequent siblings)
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In the following commit, callers of `fill_bitmap_tree()` will be
required to check the bit corresponding to their tree before calling
that function. That change will reduce the overhead of setting up and
tearing down stack frames for trees whose bits are already set.
To prepare for that change, have callers pass in the tree's bit position
in `fill_bitmap_tree()`, which will make the next commit easier to read.
In the meantime, this change has a surprising and measurable benefit
during bitmap generation, particularly on very large repositories.
When processing sub-trees within `fill_bitmap_tree()`, the preimage of
this patch did the following:
while (tree_entry(&desc, entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo,
&entry.oid)) < 0) {
/* ... */
}
/* ... */
}
}
, first performing the object lookup via `lookup_tree()`, and then
locating its bit position within the recursive call. This patch
effectively reorders those two calls so that we first discover the
sub-tree's bit position, *then* load its tree.
By reordering these two operations, we spend fewer CPU cycles per
instruction, likely due to improved CPU dependency/cache/pipeline
behavior. Comparing the results of: running `perf stat` before and after
this commit, we have:
+--------------+-------------+-------------+-------------------+
| | HEAD^ | HEAD | Delta |
+--------------+-------------+-------------+-------------------+
| elapsed | 612.5 s | 582.4 s | -30.1 s (-4.9%) |
| cycles | 2,857.3 B | 2,713.3 B | -144.0 B (-5.0%) |
| instructions | 2,413.2 B | 2,415.5 B | +2.3 B (+0.1%) |
| CPI | 1.184 | 1.123 | -0.061 (-5.1%) |
+--------------+-------------+-------------+-------------------+
In a large repository with ~4.8M commit, and ~37.1M tree objects this
change improves timing from ~612.5 seconds down to ~582.4 seconds, or a
~4.9% improvement. More importantly, the number of CPU cycles spent
dropped off significantly as a result of this commit, lowering our
cycles-per-instruction ratio by about ~5.1%.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 1c8070f99c0..2d5ff8fd406 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -456,10 +456,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb)
static int fill_bitmap_tree(struct bitmap_writer *writer,
struct bitmap *bitmap,
- struct tree *tree)
+ struct tree *tree,
+ uint32_t pos)
{
int found;
- uint32_t pos;
struct tree_desc desc;
struct name_entry entry;
@@ -467,9 +467,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
* If our bit is already set, then there is nothing to do. Both this
* tree and all of its children will be set.
*/
- pos = find_object_pos(writer, &tree->object.oid, &found);
- if (!found)
- return -1;
if (bitmap_get(bitmap, pos))
return 0;
bitmap_set(bitmap, pos);
@@ -482,8 +479,12 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
while (tree_entry(&desc, &entry)) {
switch (object_type(entry.mode)) {
case OBJ_TREE:
+ pos = find_object_pos(writer, &entry.oid, &found);
+ if (!found)
+ return -1;
if (fill_bitmap_tree(writer, bitmap,
- lookup_tree(writer->repo, &entry.oid)) < 0)
+ lookup_tree(writer->repo,
+ &entry.oid), pos) < 0)
return -1;
break;
case OBJ_BLOB:
@@ -575,8 +576,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
}
while (tree_queue->nr) {
- if (fill_bitmap_tree(writer, ent->bitmap,
- prio_queue_get(tree_queue)) < 0)
+ struct tree *t = prio_queue_get(tree_queue);
+ int found;
+
+ pos = find_object_pos(writer, &t->object.oid, &found);
+ if (!found)
+ return -1;
+
+ if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
return -1;
}
return 0;
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 2/8] pack-bitmap: check subtree bits before recursing
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
2026-05-27 19:55 ` [PATCH v2 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
@ 2026-05-27 19:55 ` Taylor Blau
2026-05-27 19:55 ` [PATCH v2 3/8] pack-bitmap: reuse stored selected bitmaps Taylor Blau
` (5 subsequent siblings)
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
In the previous commit, we adjusted the callers of `fill_bitmap_tree()`
to pass in the bit position of the tree they wish to fill.
This commit makes use of that information at the call site to avoid
setting up a stack frame for fill_bitmap_tree() entirely whenever a
tree's bit position is already set.
Since this is such a hot path, the avoided cost of setting up and
tearing down stack frames for each noop'd call to `fill_bitmap_tree()`
is significant:
+--------------+-------------+-------------+-------------------+
| | HEAD^ | HEAD | Delta |
+--------------+-------------+-------------+-------------------+
| elapsed | 582.4 s | 562.8 s | -19.6 s (-3.4%) |
| cycles | 2,713.3 B | 2,621.3 B | -92.0 B (-3.4%) |
| instructions | 2,415.5 B | 2,348.9 B | -66.6 B (-2.8%) |
| CPI | 1.123 | 1.116 | -0.007 (-0.7%) |
+--------------+-------------+-------------+-------------------+
In the same repository as in the previous commit, our timings dropped
from ~582.4 seconds down to ~562.77 seconds.
While the cycles-per-instruction ratio is basically unchanged, we
execute significantly fewer instructions, and correspondingly fewer
cycles.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 23 +++++++++++++++++------
1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 2d5ff8fd406..72610397020 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -463,12 +463,6 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
struct tree_desc desc;
struct name_entry entry;
- /*
- * If our bit is already set, then there is nothing to do. Both this
- * tree and all of its children will be set.
- */
- if (bitmap_get(bitmap, pos))
- return 0;
bitmap_set(bitmap, pos);
if (repo_parse_tree(writer->repo, tree) < 0)
@@ -482,6 +476,15 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
pos = find_object_pos(writer, &entry.oid, &found);
if (!found)
return -1;
+ if (bitmap_get(bitmap, pos)) {
+ /*
+ * If our bit is already set, then there
+ * is nothing to do. Both this tree and
+ * all of its children will be set.
+ */
+ break;
+ }
+
if (fill_bitmap_tree(writer, bitmap,
lookup_tree(writer->repo,
&entry.oid), pos) < 0)
@@ -582,6 +585,14 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
pos = find_object_pos(writer, &t->object.oid, &found);
if (!found)
return -1;
+ if (bitmap_get(ent->bitmap, pos)) {
+ /*
+ * If our bit is already set, then there is
+ * nothing to do. Both this tree and all of its
+ * children will be set.
+ */
+ continue;
+ }
if (fill_bitmap_tree(writer, ent->bitmap, t, pos) < 0)
return -1;
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 3/8] pack-bitmap: reuse stored selected bitmaps
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
2026-05-27 19:55 ` [PATCH v2 1/8] pack-bitmap: pass object position to `fill_bitmap_tree()` Taylor Blau
2026-05-27 19:55 ` [PATCH v2 2/8] pack-bitmap: check subtree bits before recursing Taylor Blau
@ 2026-05-27 19:55 ` Taylor Blau
2026-05-27 19:55 ` [PATCH v2 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
` (4 subsequent siblings)
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
When `fill_bitmap_commit()` reaches an ancestor that was selected for
its own bitmap and processed earlier, its object closure is already
stored in `writer->bitmaps` as an EWAH bitmap. As a result, walking
through that commit's tree and parents again is redundant.
Teach `fill_bitmap_commit()` to notice that case. For non-root commits in
the walk, look for a stored selected bitmap and OR it into the bitmap
being built. If one exists, skip the commit, its tree, and its parents.
Building bitmaps from scratch on the same test repository from the
previous commits yields a significant speed-up:
+------------------+-------------+-------------+---------------------+
| | HEAD^ | HEAD | Delta |
+------------------+-------------+-------------+---------------------+
| elapsed | 562.8 s | 324.8 s | -237.9 s (-42.3%) |
| cycles | 2,621.3 B | 1,508.6 B | -1,112.7 B (-42.4%) |
| instructions | 2,348.9 B | 1,436.6 B | -912.3 B (-38.8%) |
| CPI | 1.116 | 1.050 | -0.066 (-5.9%) |
+------------------+-------------+-------------+---------------------+
In our testing repository, there are 1,261 commits selected for bitmap
coverage, and 1,382 maximal commits induced as a result of that. Of the
1,382 calls made to `fill_bitmap_commit()` (one per maximal commit), 131
of them can be short-circuited at some point during their traversal as a
consequence of this change.
In large repositories where the cost of filling the bitmap for any
individual commit is large, being able to short-circuit even ~9.5% of
the calls to `fill_bitmap_commit()` results in a significant savings.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 72610397020..651ad467469 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -509,6 +509,9 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
static int reused_bitmaps_nr;
static int reused_pseudo_merge_bitmaps_nr;
+static int fill_bitmap_commit_calls_nr;
+static int fill_bitmap_commit_found_ancestor_nr;
+
static int fill_bitmap_commit(struct bitmap_writer *writer,
struct bb_commit *ent,
struct commit *commit,
@@ -519,6 +522,9 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
{
int found;
uint32_t pos;
+
+ fill_bitmap_commit_calls_nr++;
+
if (!ent->bitmap)
ent->bitmap = bitmap_new();
@@ -553,6 +559,28 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
bitmap_free(remapped);
}
+ /*
+ * If we encounter an ancestor for which we have already
+ * computed a bitmap during this build (i.e. a regular
+ * selected commit processed earlier in topo order), we can
+ * short-circuit the walk: its stored bitmap already covers
+ * the commit itself, its tree, and all of its ancestors.
+ */
+ if (c != commit) {
+ khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+ c->object.oid);
+ if (hash_pos != kh_end(writer->bitmaps)) {
+ struct bitmapped_commit *stored =
+ kh_value(writer->bitmaps, hash_pos);
+ if (stored && stored->bitmap) {
+ fill_bitmap_commit_found_ancestor_nr++;
+ bitmap_or_ewah(ent->bitmap,
+ stored->bitmap);
+ continue;
+ }
+ }
+ }
+
/*
* Mark ourselves and queue our tree. The commit
* walk ensures we cover all parents.
@@ -692,6 +720,12 @@ int bitmap_writer_build(struct bitmap_writer *writer)
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_pseudo_merge_reused",
reused_pseudo_merge_bitmaps_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "fill_bitmap_commit_calls_nr",
+ fill_bitmap_commit_calls_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "fill_bitmap_commit_found_ancestor_nr",
+ fill_bitmap_commit_found_ancestor_nr);
stop_progress(&writer->progress);
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 4/8] pack-bitmap: consolidate `find_object_pos()` success path
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
` (2 preceding siblings ...)
2026-05-27 19:55 ` [PATCH v2 3/8] pack-bitmap: reuse stored selected bitmaps Taylor Blau
@ 2026-05-27 19:55 ` Taylor Blau
2026-05-27 19:56 ` [PATCH v2 5/8] pack-bitmap: cache object positions during fill Taylor Blau
` (3 subsequent siblings)
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:55 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
Both sides of `find_object_pos()` report success in the same way by
setting the optional `found` out-parameter and return the resolved
bitmap position.
Prepare for adding more bookkeeping around object-position lookups by
storing the result in a local `pos` variable and sharing the success
return path between the packlist and MIDX cases.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 651ad467469..42ed22feacc 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -217,6 +217,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, int *found)
{
struct object_entry *entry;
+ uint32_t pos;
entry = packlist_find(writer->to_pack, oid);
if (entry) {
@@ -224,23 +225,22 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
if (writer->midx)
base_objects = writer->midx->num_objects +
writer->midx->num_objects_in_base;
-
- if (found)
- *found = 1;
- return oe_in_pack_pos(writer->to_pack, entry) + base_objects;
+ pos = oe_in_pack_pos(writer->to_pack, entry) + base_objects;
} else if (writer->midx) {
- uint32_t at, pos;
+ uint32_t at;
if (!bsearch_midx(oid, writer->midx, &at))
goto missing;
if (midx_to_pack_pos(writer->midx, at, &pos) < 0)
goto missing;
-
- if (found)
- *found = 1;
- return pos;
+ } else {
+ goto missing;
}
+ if (found)
+ *found = 1;
+ return pos;
+
missing:
if (found)
*found = 0;
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 5/8] pack-bitmap: cache object positions during fill
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
` (3 preceding siblings ...)
2026-05-27 19:55 ` [PATCH v2 4/8] pack-bitmap: consolidate `find_object_pos()` success path Taylor Blau
@ 2026-05-27 19:56 ` Taylor Blau
2026-05-27 19:56 ` [PATCH v2 6/8] pack-bitmap: sort bitmaps before XORing Taylor Blau
` (2 subsequent siblings)
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
The previous commits removed some redundant work from bitmap generation
by avoiding unnecessary tree recursion and by reusing selected bitmaps
that have already been computed.
Even with those changes in place, there is still an extremely hot path
from `fill_bitmap_commit()` and `fill_bitmap_tree()` to translate object
IDs into their corresponding bit positions in order to generate their
bitmaps.
In a small repository, this overhead is not significant. However, in a
very large repository (e.g., the one that we have been using as a
benchmark over the past several commits with ~57M total objects), the
overhead of locating object bit positions (often repeatedly) adds up
significantly.
Combat this by adding a small, direct-mapped cache to the bitmap writer
which maps object IDs to their corresponding bit positions. Size the
cache according to the number of objects being written, with fixed lower
and upper bounds so small repositories do not pay for a large table and
large repositories can avoid most repeated packlist and MIDX lookups.
On my machine with (a somewhat outdated) GCC 15.2.0, each entry in the
cache is 40 bytes wide:
$ pahole -C bitmap_pos_cache_entry pack-bitmap-write.o
struct bitmap_pos_cache_entry {
struct object_id oid; /* 0 36 */
uint32_t pos; /* 36 4 */
/* size: 40, cachelines: 1, members: 2 */
/* last cacheline: 40 bytes */
};
, and we will allocate up to 2^21 entries for a maximum total of 80 MiB
of cache overhead.
In our example repository from above and in earlier commits, this
results in a ~9.4% reduction in runtime relative to the previous commit:
+------------------+-------------+-------------+---------------------+
| | HEAD^ | HEAD | Delta |
+------------------+-------------+-------------+---------------------+
| elapsed | 324.8 s | 294.1 s | -30.7 s (-9.4%) |
| cycles | 1,508.6 B | 1,365.5 B | -143.0 B (-9.5%) |
| instructions | 1,436.6 B | 1,389.8 B | -46.9 B (-3.3%) |
| CPI | 1.050 | 0.983 | -0.068 (-6.4%) |
+------------------+-------------+-------------+---------------------+
When generating bitmaps on this repository (to produce the above
timings), the cache grew to its maximum size of 80 MiB, and resulted in
1.024B cache hits and 59.957M cache misses.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 88 ++++++++++++++++++++++++++++++++++++++++++++-
pack-bitmap.h | 7 ++++
2 files changed, 94 insertions(+), 1 deletion(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 42ed22feacc..4b6fb07edd7 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -89,6 +89,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
ewah_free(writer->tags);
kh_destroy_oid_map(writer->bitmaps);
+ free(writer->pos_cache);
kh_foreach_value(writer->pseudo_merge_commits, idx,
free_pseudo_merge_commit_idx(idx));
@@ -213,15 +214,92 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
writer->selected_nr++;
}
+struct bitmap_pos_cache_entry {
+ struct object_id oid;
+ uint32_t pos;
+};
+
+#define BITMAP_POS_MIN_CACHE_SIZE (1U << 10)
+#define BITMAP_POS_MAX_CACHE_SIZE (1U << 21)
+#define BITMAP_POS_CACHE_VALID (1U << 31)
+
+static void bitmap_writer_init_pos_cache(struct bitmap_writer *writer)
+{
+ if (writer->pos_cache)
+ return;
+
+ writer->pos_cache_nr = BITMAP_POS_MIN_CACHE_SIZE;
+
+ while (writer->pos_cache_nr < writer->to_pack->nr_objects &&
+ writer->pos_cache_nr < BITMAP_POS_MAX_CACHE_SIZE)
+ writer->pos_cache_nr <<= 1;
+
+ CALLOC_ARRAY(writer->pos_cache, writer->pos_cache_nr);
+}
+
+static size_t bitmap_writer_pos_cache_slot(struct bitmap_writer *writer,
+ const struct object_id *oid)
+{
+ return oidhash(oid) & (writer->pos_cache_nr - 1);
+}
+
+static bool bitmap_writer_pos_cache_valid(struct bitmap_writer *writer,
+ size_t slot)
+{
+ return !!(writer->pos_cache[slot].pos & BITMAP_POS_CACHE_VALID);
+}
+
+static int find_cached_object_pos(struct bitmap_writer *writer,
+ const struct object_id *oid, uint32_t *pos)
+{
+ size_t slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+ if (bitmap_writer_pos_cache_valid(writer, slot) &&
+ oideq(&writer->pos_cache[slot].oid, oid)) {
+ writer->pos_cache_hits++;
+ *pos = writer->pos_cache[slot].pos & ~BITMAP_POS_CACHE_VALID;
+ return 1;
+ }
+
+ writer->pos_cache_misses++;
+ return 0;
+}
+
+static uint32_t store_cached_object_pos(struct bitmap_writer *writer,
+ const struct object_id *oid,
+ uint32_t pos)
+{
+ size_t slot;
+
+ if (pos & BITMAP_POS_CACHE_VALID)
+ return pos; /* too large to cache */
+
+ slot = bitmap_writer_pos_cache_slot(writer, oid);
+
+ oidcpy(&writer->pos_cache[slot].oid, oid);
+ writer->pos_cache[slot].pos = pos | BITMAP_POS_CACHE_VALID;
+
+ return pos;
+}
+
static uint32_t find_object_pos(struct bitmap_writer *writer,
const struct object_id *oid, int *found)
{
struct object_entry *entry;
uint32_t pos;
+ bitmap_writer_init_pos_cache(writer);
+
+ if (find_cached_object_pos(writer, oid, &pos)) {
+ if (found)
+ *found = 1;
+ return pos;
+ }
+
entry = packlist_find(writer->to_pack, oid);
if (entry) {
uint32_t base_objects = 0;
+
if (writer->midx)
base_objects = writer->midx->num_objects +
writer->midx->num_objects_in_base;
@@ -239,7 +317,7 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
if (found)
*found = 1;
- return pos;
+ return store_cached_object_pos(writer, oid, pos);
missing:
if (found)
@@ -662,6 +740,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
writer->progress = start_progress(writer->repo,
"Building bitmaps",
writer->selected_nr);
+
+ writer->pos_cache_hits = 0;
+ writer->pos_cache_misses = 0;
+
trace2_region_enter("pack-bitmap-write", "building_bitmaps_total",
writer->repo);
@@ -726,6 +808,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
trace2_data_intmax("pack-bitmap-write", writer->repo,
"fill_bitmap_commit_found_ancestor_nr",
fill_bitmap_commit_found_ancestor_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "bitmap_pos_cache_hits", writer->pos_cache_hits);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "bitmap_pos_cache_misses", writer->pos_cache_misses);
stop_progress(&writer->progress);
diff --git a/pack-bitmap.h b/pack-bitmap.h
index a95e1c2d115..19a86554579 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -132,6 +132,8 @@ int bitmap_has_oid_in_uninteresting(struct bitmap_index *, const struct object_i
off_t get_disk_usage_from_bitmap(struct bitmap_index *, struct rev_info *);
+struct bitmap_pos_cache_entry;
+
struct bitmap_writer {
struct repository *repo;
struct ewah_bitmap *commits;
@@ -143,6 +145,11 @@ struct bitmap_writer {
struct packing_data *to_pack;
struct multi_pack_index *midx; /* if appending to a MIDX chain */
+ struct bitmap_pos_cache_entry *pos_cache;
+ size_t pos_cache_nr;
+ uint64_t pos_cache_hits;
+ uint64_t pos_cache_misses;
+
struct bitmapped_commit *selected;
unsigned int selected_nr, selected_alloc;
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 6/8] pack-bitmap: sort bitmaps before XORing
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
` (4 preceding siblings ...)
2026-05-27 19:56 ` [PATCH v2 5/8] pack-bitmap: cache object positions during fill Taylor Blau
@ 2026-05-27 19:56 ` Taylor Blau
2026-05-27 19:56 ` [PATCH v2 7/8] pack-bitmap: remember pseudo-merge parents Taylor Blau
2026-05-27 19:56 ` [PATCH v2 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps Taylor Blau
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
Reachability bitmaps may be stored as XORs against nearby bitmaps, up to
10 away. However, when callers provide selected commits in an arbitrary
order, the writer may miss good ancestor/descendant pairs and produce
much larger bitmap files without changing query coverage.
Sort the selected bitmaps in date order (from oldest to newest) before
computing XOR offsets, leaving pseudo-merge bitmaps alone (which we will
deal with separately in following commits).
On our same testing repository from previous commits, this change shrunk
our selection of 1,261 bitmaps from ~635.46 MiB to 176.4 MiB for a
~72.24% reduction in the on-disk size of our *.bitmap file. The time to
generate the smaller bitmap file decreased by ~3.69 seconds, though this
is likely mostly noise.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 4b6fb07edd7..66282ea14b5 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -327,11 +327,40 @@ static uint32_t find_object_pos(struct bitmap_writer *writer,
return 0;
}
+static int bitmapped_commit_date_cmp(const void *_a, const void *_b)
+{
+ const struct bitmapped_commit *a = _a;
+ const struct bitmapped_commit *b = _b;
+
+ if (a->commit->date < b->commit->date)
+ return -1;
+ if (a->commit->date > b->commit->date)
+ return 1;
+ return 0;
+}
+
static void compute_xor_offsets(struct bitmap_writer *writer)
{
static const int MAX_XOR_OFFSET_SEARCH = 10;
int i, next = 0;
+ int nr = bitmap_writer_nr_selected_commits(writer);
+
+ if (nr > 1) {
+ QSORT(writer->selected, nr, bitmapped_commit_date_cmp);
+
+ for (i = 0; i < nr; i++) {
+ struct bitmapped_commit *stored = &writer->selected[i];
+ khiter_t hash_pos = kh_get_oid_map(writer->bitmaps,
+ stored->commit->object.oid);
+
+ if (hash_pos == kh_end(writer->bitmaps))
+ BUG("selected commit missing from bitmap map: %s",
+ oid_to_hex(&stored->commit->object.oid));
+
+ kh_value(writer->bitmaps, hash_pos) = stored;
+ }
+ }
while (next < writer->selected_nr) {
struct bitmapped_commit *stored = &writer->selected[next];
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 7/8] pack-bitmap: remember pseudo-merge parents
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
` (5 preceding siblings ...)
2026-05-27 19:56 ` [PATCH v2 6/8] pack-bitmap: sort bitmaps before XORing Taylor Blau
@ 2026-05-27 19:56 ` Taylor Blau
2026-05-27 19:56 ` [PATCH v2 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps Taylor Blau
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
write_pseudo_merges() currently builds an array of temporary bitmaps for
the parent set of each pseudo-merge, then serializes those bitmaps later
while writing the extension.
Move those parent bitmaps onto the corresponding bitmapped_commit
entries instead. This keeps the on-disk output unchanged, but gives the
parent bitmap the same lifetime and access pattern that later changes
will use when pseudo-merge object bitmaps are built before the write
step.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 66282ea14b5..8200aed6101 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -32,6 +32,7 @@ struct bitmapped_commit {
struct commit *commit;
struct ewah_bitmap *bitmap;
struct ewah_bitmap *write_as;
+ struct ewah_bitmap *pseudo_merge_parents;
int flags;
int xor_offset;
uint32_t commit_pos;
@@ -102,6 +103,7 @@ void bitmap_writer_free(struct bitmap_writer *writer)
if (bc->write_as != bc->bitmap)
ewah_free(bc->write_as);
ewah_free(bc->bitmap);
+ ewah_free(bc->pseudo_merge_parents);
}
free(writer->selected);
}
@@ -210,6 +212,7 @@ void bitmap_writer_push_commit(struct bitmap_writer *writer,
writer->selected[writer->selected_nr].write_as = NULL;
writer->selected[writer->selected_nr].flags = 0;
writer->selected[writer->selected_nr].pseudo_merge = pseudo_merge;
+ writer->selected[writer->selected_nr].pseudo_merge_parents = NULL;
writer->selected_nr++;
}
@@ -1004,42 +1007,47 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
struct hashfile *f)
{
struct oid_array commits = OID_ARRAY_INIT;
- struct bitmap **commits_bitmap = NULL;
off_t *pseudo_merge_ofs = NULL;
off_t start, table_start, next_ext;
uint32_t base = bitmap_writer_nr_selected_commits(writer);
size_t i, j = 0;
- CALLOC_ARRAY(commits_bitmap, writer->pseudo_merges_nr);
CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
struct bitmapped_commit *merge = &writer->selected[base + i];
struct commit_list *p;
+ struct bitmap *parents = bitmap_new();
if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
- commits_bitmap[i] = bitmap_new();
-
for (p = merge->commit->parents; p; p = p->next)
- bitmap_set(commits_bitmap[i],
+ bitmap_set(parents,
find_object_pos(writer, &p->item->object.oid,
NULL));
+
+ merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+ bitmap_free(parents);
}
start = hashfile_total(f);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
- struct ewah_bitmap *commits_ewah = bitmap_to_ewah(commits_bitmap[i]);
+ struct bitmapped_commit *merge = &writer->selected[base + i];
+
+ if (!merge->pseudo_merge)
+ BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
+
+ if (!merge->pseudo_merge_parents)
+ BUG("missing pseudo-merge parents bitmap for commit %s",
+ oid_to_hex(&merge->commit->object.oid));
pseudo_merge_ofs[i] = hashfile_total(f);
- dump_bitmap(f, commits_ewah);
+ dump_bitmap(f, merge->pseudo_merge_parents);
dump_bitmap(f, writer->selected[base+i].write_as);
-
- ewah_free(commits_ewah);
}
next_ext = st_add(hashfile_total(f),
@@ -1122,12 +1130,8 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
hashwrite_be64(f, table_start - start);
hashwrite_be64(f, hashfile_total(f) - start + sizeof(uint64_t));
- for (i = 0; i < writer->pseudo_merges_nr; i++)
- bitmap_free(commits_bitmap[i]);
-
oid_array_clear(&commits);
free(pseudo_merge_ofs);
- free(commits_bitmap);
}
static int table_cmp(const void *_va, const void *_vb, void *_data)
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread* [PATCH v2 8/8] pack-bitmap: build pseudo-merge bitmaps after regular bitmaps
2026-05-27 19:55 ` [PATCH v2 " Taylor Blau
` (6 preceding siblings ...)
2026-05-27 19:56 ` [PATCH v2 7/8] pack-bitmap: remember pseudo-merge parents Taylor Blau
@ 2026-05-27 19:56 ` Taylor Blau
7 siblings, 0 replies; 34+ messages in thread
From: Taylor Blau @ 2026-05-27 19:56 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Jeff King, Elijah Newren, Derrick Stolee
When generating bitmaps, `bitmap_builder_init()` starts with an initial
selection of commits to receive bitmap coverage, and then determines a
set of "maximal" commits based on its input.
Commit 089f751360f (pack-bitmap-write: build fewer intermediate bitmaps,
2020-12-08) has extensive details, but the gist is as follows:
Each selected commit starts with one commit_mask bit in its "commit
mask" bitmap. Then, we walk the first-parent history in topological
order and OR each commit's mask into its (first) parent. Whenever that
OR results in the parent having more bits set, the child is deemed to be
non-maximal, and the frontier is pushed further back along the first
parent history.
That approach works extremely well for ordinary selected commits, whose
first-parent histories often describe real sharing between the bitmaps
we are going to write.
It struggles, however, to efficiently generate pseudo-merge bitmaps.
Unlike ordinary commits for which the above algorithm is designed,
pseudo-merges don't represent any "real" commit in history, just a
grouping of non-bitmapped reference tips. In that sense, their first
parent is just a part of a larger set, and treating them like ordinary
selected commits imposes a significant slow-down when generating bitmaps
with pseudo-merges enabled.
Consider partitioning all non-bitmapped reference tips into eight
individual pseudo-merges via the following configuration:
[bitmapPseudoMerge "all"]
pattern=refs/
threshold=now
stableSize=10000000
maxMerges=8
, the cost of generating a bitmap from scratch rises significantly:
+------------------+-----------------+---------------+---------------------+
| | no pseudo-merge | pseudo-merges | Delta |
| | | (HEAD^) | |
+------------------+-----------------+---------------+---------------------+
| elapsed | 294.1 s | 575.0 s | +280.9 s (+95.5%) |
| cycles | 1,365.5 B | 2,686.9 B | +1,321.4 B (+96.8%) |
| instructions | 1,389.8 B | 2,546.6 B | +1,156.8 B (+83.2%) |
| CPI | 0.983 | 1.055 | +0.073 (+7.4%) |
+------------------+-----------------+---------------+---------------------+
This is a particularly poor trade-off, because the time saved by these
pseudo-merges during, e.g.,
$ git rev-list --count --all --objects --use-bitmap-index
is only:
$ hyperfine -L v true,false -n 'pseudo-merges: {v}' '
GIT_TEST_USE_PSEUDO_MERGES={v} git.compile rev-list --count \
--objects --all --use-bitmap-index
'
Benchmark 1: pseudo-merges: true
Time (mean ± σ): 2.613 s ± 0.012 s [User: 2.308 s, System: 0.305 s]
Range (min … max): 2.594 s … 2.633 s 10 runs
Benchmark 2: pseudo-merges: false
Time (mean ± σ): 52.205 s ± 0.170 s [User: 51.500 s, System: 0.697 s]
Range (min … max): 51.956 s … 52.458 s 10 runs
Summary
pseudo-merges: true ran
19.98 ± 0.11 times faster than pseudo-merges: false
In other words, we pay a nearly ~5 minute penalty to generate
pseudo-merge bitmaps, but only save ~50 seconds during traversal.
The problem stems from injecting pseudo-merges into the bitmap builder
as if they were normal commits. The maximal commit selection algorithm
was simply not designed for that case, and performs predictably poorly.
The only reason we reused the maximal commit selection routine for
pseudo-merges alongside regular non-pseudo-merge commits is because we
represent them both as commit objects (where the pseudo-merge commits
just represent a made-up commit as opposed to one that actually exists
in a repository's object store).
Instead, build the regular selected commit bitmaps first, considering
only non-pseudo-merge commits in `bitmap_builder_init()`. Once those
bitmaps have been stored, build each pseudo-merge bitmap separately and
attach its parent and object bitmaps to the corresponding pseudo-merge
entry before writing the extension.
This keeps the regular bitmap build shaped like the no-pseudo-merge
case. The later pseudo-merge fill can still stop at stored selected
ancestor bitmaps, so it does not have to rewalk each pseudo-merge
closure from scratch.
When an existing bitmap has the same pseudo-merge parent set, reuse and
remap that whole pseudo-merge bitmap before falling back to
fill_bitmap_commit(). This preserves the benefit of stable pseudo-merges
while keeping the on-disk format and reader behavior unchanged.
As a result, the overhead cost for generating pseudo-merges in the above
configuration is much smaller:
+------------------+-----------------+---------------+-------------------+
| | no pseudo-merge | pseudo-merges | Delta |
| | | (HEAD) | |
+------------------+-----------------+---------------+-------------------+
| elapsed | 294.1 s | 328.4 s | +34.3 s (+11.7%) |
| cycles | 1,365.5 B | 1,529.3 B | +163.7 B (+12.0%) |
| instructions | 1,389.8 B | 1,552.8 B | +163.0 B (+11.7%) |
| CPI | 0.983 | 0.985 | +0.002 (+0.2%) |
+------------------+-----------------+---------------+-------------------+
Recall that at the start of this series, generating reachability bitmaps
took 612.5 seconds *without* pseudo-merges. With this commit, it is
still ~46.38% *faster* to generate reachability bitmaps *with*
pseudo-merges than it was to generate bitmaps wihtout them at the
beginning of this series.
The changes to implement this are mostly straightforward. We exclude
pseudo-merge commits from the existing bitmap generation, and walk over
them in a separate pass, by either reusing an existing on-disk
pseudo-merge, or passing the pseudo-merge commit itself back to the
existing routine in `fill_bitmap_commit()`.
(Note that the routine to build pseudo-merge bitmaps is the same both
before and after this change, the difference is only that we do not let
psuedo-merges participate in determining the set of maximal commits.)
The only wrinkle is that `fill_bitmap_commit()` must be taught to not
expect that all tree objects have been parsed, which is the case for any
portion of history reachable by one or more pseudo-merge(s), but not by
any non-pseudo-merge commit selected for bitmapping.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap-write.c | 210 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 174 insertions(+), 36 deletions(-)
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index 8200aed6101..1bcb3f98a42 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -446,13 +446,17 @@ static void bitmap_builder_init(struct bitmap_builder *bb,
revs.topo_order = 1;
revs.first_parent_only = 1;
- for (i = 0; i < writer->selected_nr; i++) {
+ for (i = 0; i < bitmap_writer_nr_selected_commits(writer); i++) {
struct bitmapped_commit *bc = &writer->selected[i];
struct bb_commit *ent = bb_data_at(&bb->data, bc->commit);
+ if (bc->pseudo_merge)
+ BUG("unexpected pseudo-merge at %"PRIuMAX,
+ (uintmax_t)i);
+
ent->selected = 1;
ent->maximal = 1;
- ent->pseudo_merge = bc->pseudo_merge;
+ ent->pseudo_merge = 0;
ent->idx = i;
ent->commit_mask = bitmap_new();
@@ -618,6 +622,8 @@ static int fill_bitmap_tree(struct bitmap_writer *writer,
static int reused_bitmaps_nr;
static int reused_pseudo_merge_bitmaps_nr;
+static int pseudo_merge_bitmap_nr;
+static int pseudo_merge_bitmap_parents;
static int fill_bitmap_commit_calls_nr;
static int fill_bitmap_commit_found_ancestor_nr;
@@ -631,8 +637,12 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
const uint32_t *mapping)
{
int found;
+ int from_pseudo_merge = commit->object.flags & BITMAP_PSEUDO_MERGE;
uint32_t pos;
+ if (ent->pseudo_merge)
+ BUG("unexpected pseudo-merge commit in fill_bitmap_commit()");
+
fill_bitmap_commit_calls_nr++;
if (!ent->bitmap)
@@ -648,10 +658,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
struct ewah_bitmap *old;
struct bitmap *remapped = bitmap_new();
- if (commit->object.flags & BITMAP_PSEUDO_MERGE)
- old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
- else
- old = bitmap_for_commit(old_bitmap, c);
+ old = bitmap_for_commit(old_bitmap, c);
/*
* If this commit has an old bitmap, then translate that
* bitmap and add its bits to this one. No need to walk
@@ -660,10 +667,7 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
if (old && !rebuild_bitmap(mapping, old, remapped)) {
bitmap_or(ent->bitmap, remapped);
bitmap_free(remapped);
- if (commit->object.flags & BITMAP_PSEUDO_MERGE)
- reused_pseudo_merge_bitmaps_nr++;
- else
- reused_bitmaps_nr++;
+ reused_bitmaps_nr++;
continue;
}
bitmap_free(remapped);
@@ -696,12 +700,32 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
* walk ensures we cover all parents.
*/
if (!(c->object.flags & BITMAP_PSEUDO_MERGE)) {
+ struct tree *tree;
+
+ if (from_pseudo_merge && !c->object.parsed) {
+ /*
+ * Commits reachable from selected
+ * non-pseudo-merges are already parsed
+ * by the regular bitmap build.
+ *
+ * However, pseudo-merge fills can also
+ * reach commits that were not covered
+ * there, so parse any such leftovers
+ * before reading their tree or parents.
+ */
+ if (repo_parse_commit(writer->repo, c))
+ return -1;
+ }
+
pos = find_object_pos(writer, &c->object.oid, &found);
if (!found)
return -1;
bitmap_set(ent->bitmap, pos);
- prio_queue_put(tree_queue,
- repo_get_commit_tree(writer->repo, c));
+
+ tree = repo_get_commit_tree(writer->repo, c);
+ if (!tree)
+ return -1;
+ prio_queue_put(tree_queue, tree);
}
for (p = c->parents; p; p = p->next) {
@@ -738,6 +762,137 @@ static int fill_bitmap_commit(struct bitmap_writer *writer,
return 0;
}
+static int reuse_pseudo_merge_bitmap(struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ struct commit *merge,
+ struct ewah_bitmap **out)
+{
+ struct ewah_bitmap *old;
+ struct bitmap *remapped;
+
+ if (!old_bitmap || !mapping)
+ return 0;
+
+ old = pseudo_merge_bitmap_for_commit(old_bitmap, merge);
+ if (!old)
+ return 0;
+
+ remapped = bitmap_new();
+ if (rebuild_bitmap(mapping, old, remapped) < 0) {
+ bitmap_free(remapped);
+ return 0;
+ }
+
+ *out = bitmap_to_ewah(remapped);
+ bitmap_free(remapped);
+ reused_pseudo_merge_bitmaps_nr++;
+ return 1;
+}
+
+static int build_pseudo_merge_bitmap(struct bitmap_writer *writer,
+ struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ struct commit *merge,
+ struct ewah_bitmap **out)
+{
+ struct bb_commit ent = { 0 };
+ struct prio_queue queue = { NULL };
+ struct prio_queue tree_queue = { NULL };
+ unsigned parents = commit_list_count(merge->parents);
+ int ret;
+
+ ent.bitmap = bitmap_new();
+
+ pseudo_merge_bitmap_nr++;
+ pseudo_merge_bitmap_parents += parents;
+
+ if (reuse_pseudo_merge_bitmap(old_bitmap, mapping, merge, out)) {
+ ret = 0;
+ goto done;
+ }
+
+ ret = fill_bitmap_commit(writer, &ent, merge, &queue, &tree_queue,
+ old_bitmap, mapping);
+
+ if (!ret)
+ *out = bitmap_to_ewah(ent.bitmap);
+
+done:
+ bitmap_free(ent.bitmap);
+ clear_prio_queue(&queue);
+ clear_prio_queue(&tree_queue);
+
+ return ret;
+}
+
+static int build_pseudo_merge_bitmaps(struct bitmap_writer *writer,
+ struct bitmap_index *old_bitmap,
+ const uint32_t *mapping,
+ int *nr_stored)
+{
+ size_t i = bitmap_writer_nr_selected_commits(writer);
+ int ret = 0;
+
+ if (!writer->pseudo_merges_nr)
+ return 0;
+
+ trace2_region_enter("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+ writer->repo);
+
+ for (; i < writer->selected_nr; i++) {
+ struct bitmapped_commit *merge = &writer->selected[i];
+ struct commit_list *p;
+ struct bitmap *parents = bitmap_new();
+ struct ewah_bitmap *objects = NULL;
+
+ if (!merge->pseudo_merge)
+ BUG("found non-pseudo merge commit at %"PRIuMAX,
+ (uintmax_t)i);
+
+ for (p = merge->commit->parents; p; p = p->next) {
+ int found;
+ uint32_t pos = find_object_pos(writer,
+ &p->item->object.oid,
+ &found);
+ if (!found) {
+ bitmap_free(parents);
+ ret = -1;
+ goto done;
+ }
+ bitmap_set(parents, pos);
+ }
+
+ merge->pseudo_merge_parents = bitmap_to_ewah(parents);
+ bitmap_free(parents);
+
+ if (build_pseudo_merge_bitmap(writer, old_bitmap, mapping,
+ merge->commit, &objects) < 0) {
+ ret = -1;
+ goto done;
+ }
+ merge->bitmap = objects;
+
+ (*nr_stored)++;
+ display_progress(writer->progress, *nr_stored);
+ }
+
+done:
+ trace2_region_leave("pack-bitmap-write", "building_pseudo_merge_bitmaps",
+ writer->repo);
+
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "pseudo_merge_bitmap_nr",
+ pseudo_merge_bitmap_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "building_bitmaps_pseudo_merge_reused",
+ reused_pseudo_merge_bitmaps_nr);
+ trace2_data_intmax("pack-bitmap-write", writer->repo,
+ "pseudo_merge_bitmap_parents",
+ pseudo_merge_bitmap_parents);
+
+ return ret;
+}
+
static void store_selected(struct bitmap_writer *writer,
struct bb_commit *ent, struct commit *commit)
{
@@ -821,6 +976,10 @@ int bitmap_writer_build(struct bitmap_writer *writer)
bitmap_free(ent->bitmap);
ent->bitmap = NULL;
}
+ if (closed &&
+ build_pseudo_merge_bitmaps(writer, old_bitmap, mapping,
+ &nr_stored) < 0)
+ closed = 0;
clear_prio_queue(&queue);
clear_prio_queue(&tree_queue);
bitmap_builder_clear(&bb);
@@ -831,9 +990,6 @@ int bitmap_writer_build(struct bitmap_writer *writer)
writer->repo);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"building_bitmaps_reused", reused_bitmaps_nr);
- trace2_data_intmax("pack-bitmap-write", writer->repo,
- "building_bitmaps_pseudo_merge_reused",
- reused_pseudo_merge_bitmaps_nr);
trace2_data_intmax("pack-bitmap-write", writer->repo,
"fill_bitmap_commit_calls_nr",
fill_bitmap_commit_calls_nr);
@@ -1015,23 +1171,6 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
CALLOC_ARRAY(pseudo_merge_ofs, writer->pseudo_merges_nr);
- for (i = 0; i < writer->pseudo_merges_nr; i++) {
- struct bitmapped_commit *merge = &writer->selected[base + i];
- struct commit_list *p;
- struct bitmap *parents = bitmap_new();
-
- if (!merge->pseudo_merge)
- BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
-
- for (p = merge->commit->parents; p; p = p->next)
- bitmap_set(parents,
- find_object_pos(writer, &p->item->object.oid,
- NULL));
-
- merge->pseudo_merge_parents = bitmap_to_ewah(parents);
- bitmap_free(parents);
- }
-
start = hashfile_total(f);
for (i = 0; i < writer->pseudo_merges_nr; i++) {
@@ -1040,14 +1179,13 @@ static void write_pseudo_merges(struct bitmap_writer *writer,
if (!merge->pseudo_merge)
BUG("found non-pseudo merge commit at %"PRIuMAX, (uintmax_t)i);
- if (!merge->pseudo_merge_parents)
- BUG("missing pseudo-merge parents bitmap for commit %s",
+ if (!merge->pseudo_merge_parents || !merge->bitmap)
+ BUG("missing pseudo-merge bitmap for commit %s",
oid_to_hex(&merge->commit->object.oid));
pseudo_merge_ofs[i] = hashfile_total(f);
-
dump_bitmap(f, merge->pseudo_merge_parents);
- dump_bitmap(f, writer->selected[base+i].write_as);
+ dump_bitmap(f, merge->bitmap);
}
next_ext = st_add(hashfile_total(f),
--
2.54.0.rc1.84.g1cf18622df7
^ permalink raw reply related [flat|nested] 34+ messages in thread