* [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions
2024-01-16 19:03 [PATCH 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
@ 2024-01-16 19:03 ` Taylor Blau
2024-01-17 7:32 ` Patrick Steinhardt
2024-01-16 19:03 ` [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental` Taylor Blau
2024-02-05 22:50 ` [PATCH v2 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
2 siblings, 1 reply; 11+ messages in thread
From: Taylor Blau @ 2024-01-16 19:03 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano
Most of the tests in t5332 perform some setup before repeating a common
refrain that looks like:
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs --all >/dev/null &&
test_pack_reused $objects_nr <trace2.txt &&
test_packs_reused $packs_nr <trace2.txt
The next commit will add more tests which repeat the above refrain.
Avoid duplicating this invocation even further and prepare for the
following commit by wrapping the above in a helper function called
`test_pack_objects_reused_all()`.
Introduce another similar function `test_pack_objects_reused`, which
expects to read a list of revisions over stdin for tests which need more
fine-grained control of the contents of the pack they generate.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
t/t5332-multi-pack-reuse.sh | 70 +++++++++++++++----------------------
1 file changed, 28 insertions(+), 42 deletions(-)
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
index 2ba788b042..b53e821bc0 100755
--- a/t/t5332-multi-pack-reuse.sh
+++ b/t/t5332-multi-pack-reuse.sh
@@ -23,6 +23,26 @@ pack_position () {
grep "$1" objects | cut -d" " -f1
}
+# test_pack_objects_reused_all <pack-reused> <packs-reused>
+test_pack_objects_reused_all () {
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all >/dev/null &&
+
+ test_pack_reused "$1" <trace2.txt &&
+ test_packs_reused "$2" <trace2.txt
+}
+
+# test_pack_objects_reused <pack-reused> <packs-reused>
+test_pack_objects_reused () {
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs >/dev/null &&
+
+ test_pack_reused "$1" <trace2.txt &&
+ test_packs_reused "$2" <trace2.txt
+}
+
test_expect_success 'preferred pack is reused for single-pack reuse' '
test_config pack.allowPackReuse single &&
@@ -34,14 +54,10 @@ test_expect_success 'preferred pack is reused for single-pack reuse' '
git multi-pack-index write --bitmap &&
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs --all >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused_all 3 1
'
+
test_expect_success 'enable multi-pack reuse' '
git config pack.allowPackReuse multi
'
@@ -57,21 +73,11 @@ test_expect_success 'reuse all objects from subset of bitmapped packs' '
^$(git rev-parse A)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs <in >/dev/null &&
-
- test_pack_reused 6 <trace2.txt &&
- test_packs_reused 2 <trace2.txt
+ test_pack_objects_reused 6 2 <in
'
test_expect_success 'reuse all objects from all packs' '
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs --all >/dev/null &&
-
- test_pack_reused 9 <trace2.txt &&
- test_packs_reused 3 <trace2.txt
+ test_pack_objects_reused_all 9 3
'
test_expect_success 'reuse objects from first pack with middle gap' '
@@ -104,12 +110,7 @@ test_expect_success 'reuse objects from first pack with middle gap' '
^$(git rev-parse D)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'reuse objects from middle pack with middle gap' '
@@ -125,12 +126,7 @@ test_expect_success 'reuse objects from middle pack with middle gap' '
^$(git rev-parse D)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'omit delta with uninteresting base (same pack)' '
@@ -160,10 +156,6 @@ test_expect_success 'omit delta with uninteresting base (same pack)' '
^$base
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
# We can only reuse the 3 objects corresponding to "other" from
# the latest pack.
#
@@ -175,8 +167,7 @@ test_expect_success 'omit delta with uninteresting base (same pack)' '
# The remaining objects from the other pack are similarly not
# reused because their objects are on the uninteresting side of
# the query.
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'omit delta from uninteresting base (cross pack)' '
@@ -189,15 +180,10 @@ test_expect_success 'omit delta from uninteresting base (cross pack)' '
git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --all >/dev/null &&
-
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
objects_nr="$(git rev-list --count --all --objects)" &&
- test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
- test_packs_reused $packs_nr <trace2.txt
+ test_pack_objects_reused_all $(($objects_nr - 1)) $packs_nr
'
test_done
--
2.43.0.334.gd4dbce1db5.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions
2024-01-16 19:03 ` [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
@ 2024-01-17 7:32 ` Patrick Steinhardt
2024-02-05 22:44 ` Taylor Blau
0 siblings, 1 reply; 11+ messages in thread
From: Patrick Steinhardt @ 2024-01-17 7:32 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Jeff King, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 2738 bytes --]
On Tue, Jan 16, 2024 at 02:03:44PM -0500, Taylor Blau wrote:
> Most of the tests in t5332 perform some setup before repeating a common
> refrain that looks like:
>
> : >trace2.txt &&
> GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> git pack-objects --stdout --revs --all >/dev/null &&
>
> test_pack_reused $objects_nr <trace2.txt &&
> test_packs_reused $packs_nr <trace2.txt
>
> The next commit will add more tests which repeat the above refrain.
> Avoid duplicating this invocation even further and prepare for the
> following commit by wrapping the above in a helper function called
> `test_pack_objects_reused_all()`.
>
> Introduce another similar function `test_pack_objects_reused`, which
> expects to read a list of revisions over stdin for tests which need more
> fine-grained control of the contents of the pack they generate.
>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
> t/t5332-multi-pack-reuse.sh | 70 +++++++++++++++----------------------
> 1 file changed, 28 insertions(+), 42 deletions(-)
>
> diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
> index 2ba788b042..b53e821bc0 100755
> --- a/t/t5332-multi-pack-reuse.sh
> +++ b/t/t5332-multi-pack-reuse.sh
> @@ -23,6 +23,26 @@ pack_position () {
> grep "$1" objects | cut -d" " -f1
> }
>
> +# test_pack_objects_reused_all <pack-reused> <packs-reused>
> +test_pack_objects_reused_all () {
> + : >trace2.txt &&
> + GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> + git pack-objects --stdout --revs --all >/dev/null &&
> +
> + test_pack_reused "$1" <trace2.txt &&
> + test_packs_reused "$2" <trace2.txt
> +}
> +
> +# test_pack_objects_reused <pack-reused> <packs-reused>
> +test_pack_objects_reused () {
> + : >trace2.txt &&
> + GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> + git pack-objects --stdout --revs >/dev/null &&
> +
> + test_pack_reused "$1" <trace2.txt &&
> + test_packs_reused "$2" <trace2.txt
> +}
> +
> test_expect_success 'preferred pack is reused for single-pack reuse' '
> test_config pack.allowPackReuse single &&
>
[snip]
> @@ -104,12 +110,7 @@ test_expect_success 'reuse objects from first pack with middle gap' '
> ^$(git rev-parse D)
> EOF
>
> - : >trace2.txt &&
> - GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> - git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
> -
> - test_pack_reused 3 <trace2.txt &&
> - test_packs_reused 1 <trace2.txt
> + test_pack_objects_reused 3 1 <in
This conversion causes us to drop the `--delta-base-offset` flag. It
would be great to have an explanation in the commit message why it is
fine to drop it.
Other than that this looks like a nice simplification to me.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions
2024-01-17 7:32 ` Patrick Steinhardt
@ 2024-02-05 22:44 ` Taylor Blau
0 siblings, 0 replies; 11+ messages in thread
From: Taylor Blau @ 2024-02-05 22:44 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Jeff King, Junio C Hamano
On Wed, Jan 17, 2024 at 08:32:09AM +0100, Patrick Steinhardt wrote:
> > @@ -104,12 +110,7 @@ test_expect_success 'reuse objects from first pack with middle gap' '
> > ^$(git rev-parse D)
> > EOF
> >
> > - : >trace2.txt &&
> > - GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> > - git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
> > -
> > - test_pack_reused 3 <trace2.txt &&
> > - test_packs_reused 1 <trace2.txt
> > + test_pack_objects_reused 3 1 <in
>
> This conversion causes us to drop the `--delta-base-offset` flag. It
> would be great to have an explanation in the commit message why it is
> fine to drop it.
Oops, that was unintentional. I'll resend a new round that fixes this
issue shortly.
> Other than that this looks like a nice simplification to me.
Thanks for the review!
Thanks,
Taylor
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental`
2024-01-16 19:03 [PATCH 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
2024-01-16 19:03 ` [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
@ 2024-01-16 19:03 ` Taylor Blau
2024-01-17 7:32 ` Patrick Steinhardt
2024-02-05 22:50 ` [PATCH v2 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
2 siblings, 1 reply; 11+ messages in thread
From: Taylor Blau @ 2024-01-16 19:03 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano
Now that multi-pack reuse is supported, enable it via the
feature.experimental configuration in addition to the classic
`pack.allowPackReuse`.
This will allow more users to experiment with the new behavior who might
not otherwise be aware of the existing `pack.allowPackReuse`
configuration option.
The enum with values NO_PACK_REUSE, SINGLE_PACK_REUSE, and
MULTI_PACK_REUSE is defined statically in builtin/pack-objects.c's
compilation unit. We could hoist that enum into a scope visible from the
repository_settings struct, and then use that enum value in
pack-objects. Instead, define a single int that indicates what
pack-objects's default value should be to avoid additional unnecessary
code movement.
Though `feature.experimental` implies `pack.allowPackReuse=multi`, this
can still be overridden by explicitly setting the latter configuration
to either "single" or "false". Tests covering all of these cases are
showin t5332.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/config/feature.txt | 3 +++
builtin/pack-objects.c | 2 ++
repo-settings.c | 1 +
repository.h | 1 +
t/t5332-multi-pack-reuse.sh | 16 ++++++++++++++++
5 files changed, 23 insertions(+)
diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt
index bf9546fca4..f061b64b74 100644
--- a/Documentation/config/feature.txt
+++ b/Documentation/config/feature.txt
@@ -17,6 +17,9 @@ skipping more commits at a time, reducing the number of round trips.
+
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
walking fewer objects.
++
+* `pack.allowPackReuse=multi` may improve the time it takes to create a pack by
+reusing objects from multiple packs instead of just one.
feature.manyFiles::
Enable config options that optimize for repos with many files in the
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d8c2128a97..329aeac804 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4396,6 +4396,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
prepare_repo_settings(the_repository);
if (sparse < 0)
sparse = the_repository->settings.pack_use_sparse;
+ if (the_repository->settings.pack_use_multi_pack_reuse)
+ allow_pack_reuse = MULTI_PACK_REUSE;
}
reset_pack_idx_option(&pack_idx_opts);
diff --git a/repo-settings.c b/repo-settings.c
index 30cd478762..a0b590bc6c 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -43,6 +43,7 @@ void prepare_repo_settings(struct repository *r)
if (experimental) {
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
r->settings.pack_use_bitmap_boundary_traversal = 1;
+ r->settings.pack_use_multi_pack_reuse = 1;
}
if (manyfiles) {
r->settings.index_version = 4;
diff --git a/repository.h b/repository.h
index 5f18486f64..b92881b0a3 100644
--- a/repository.h
+++ b/repository.h
@@ -36,6 +36,7 @@ struct repo_settings {
int sparse_index;
int pack_read_reverse_index;
int pack_use_bitmap_boundary_traversal;
+ int pack_use_multi_pack_reuse;
/*
* Does this repository have core.useReplaceRefs=true (on by
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
index b53e821bc0..ccc8735db6 100755
--- a/t/t5332-multi-pack-reuse.sh
+++ b/t/t5332-multi-pack-reuse.sh
@@ -57,6 +57,22 @@ test_expect_success 'preferred pack is reused for single-pack reuse' '
test_pack_objects_reused_all 3 1
'
+test_expect_success 'multi-pack reuse is disabled by default' '
+ test_pack_objects_reused_all 3 1
+'
+
+test_expect_success 'feature.experimental implies multi-pack reuse' '
+ test_config feature.experimental true &&
+
+ test_pack_objects_reused_all 6 2
+'
+
+test_expect_success 'multi-pack reuse can be disabled with feature.experimental' '
+ test_config feature.experimental true &&
+ test_config pack.allowPackReuse single &&
+
+ test_pack_objects_reused_all 3 1
+'
test_expect_success 'enable multi-pack reuse' '
git config pack.allowPackReuse multi
--
2.43.0.334.gd4dbce1db5.dirty
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental`
2024-01-16 19:03 ` [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental` Taylor Blau
@ 2024-01-17 7:32 ` Patrick Steinhardt
2024-02-05 22:48 ` Taylor Blau
0 siblings, 1 reply; 11+ messages in thread
From: Patrick Steinhardt @ 2024-01-17 7:32 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Jeff King, Junio C Hamano
[-- Attachment #1: Type: text/plain, Size: 2641 bytes --]
On Tue, Jan 16, 2024 at 02:03:47PM -0500, Taylor Blau wrote:
> Now that multi-pack reuse is supported, enable it via the
> feature.experimental configuration in addition to the classic
> `pack.allowPackReuse`.
>
> This will allow more users to experiment with the new behavior who might
> not otherwise be aware of the existing `pack.allowPackReuse`
> configuration option.
>
> The enum with values NO_PACK_REUSE, SINGLE_PACK_REUSE, and
> MULTI_PACK_REUSE is defined statically in builtin/pack-objects.c's
> compilation unit. We could hoist that enum into a scope visible from the
> repository_settings struct, and then use that enum value in
> pack-objects. Instead, define a single int that indicates what
> pack-objects's default value should be to avoid additional unnecessary
> code movement.
>
> Though `feature.experimental` implies `pack.allowPackReuse=multi`, this
> can still be overridden by explicitly setting the latter configuration
> to either "single" or "false". Tests covering all of these cases are
> showin t5332.
I do not mind adding more configs to `feature.experimental` because it
is the best mechanism we have for a staged rollout of features. It is
not ideal by any means as we have no way to tell how many people enable
this, or whether they hit any bugs. But we do not really have any
alternatives.
But one thing I would like to see is to have a clear plan for how
experimental features become stable. It's not a huge problem (yet)
because there are only two experimental features. One of them
("pack.useBitmapBoundaryTraversal=true") was recently added by you via
b0afdce5da (pack-bitmap.c: use commit boundary during bitmap traversal,
2023-05-08), which is perfectly fine. But the other one
("fetch.negotiationAlgorithm=skipping") has been added has been added
via b5651a2092 (experimental: default to fetch.writeCommitGraph=false,
2020-07-06), so it's been experimental for ~3.5 years by now.
I would like to avoid cases like this by laying out a plan for when
experimental features become the new default. It could be as simple as
"Let's wait N releases and then mark it stable". But having something
and documenting such a plan in our code makes it a lot more actionable
to promote those features to become stable eventually.
I know that this is not in any way specific to your patch, but I thought
this was a good opportunity to start this discussion. If we can agree on
my opinion then it would be great to add a comment to the experimental
feature to explain such an exit criterion.
Other than that this patch looks good to me, thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental`
2024-01-17 7:32 ` Patrick Steinhardt
@ 2024-02-05 22:48 ` Taylor Blau
0 siblings, 0 replies; 11+ messages in thread
From: Taylor Blau @ 2024-02-05 22:48 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Jeff King, Junio C Hamano
On Wed, Jan 17, 2024 at 08:32:15AM +0100, Patrick Steinhardt wrote:
> I would like to avoid cases like this by laying out a plan for when
> experimental features become the new default. It could be as simple as
> "Let's wait N releases and then mark it stable". But having something
> and documenting such a plan in our code makes it a lot more actionable
> to promote those features to become stable eventually.
Fair point. I think that these have been mostly ad-hoc. Unfortunately,
when folks leave the project or change their attention to working on a
different feature, things that were left in feature.experimental may be
forgotten about.
When this inevitably happens, it would be nice to have a written record
(either in the repository, or here on the mailing list) so that other
folks can take up the mantle and graduate those feature(s) as
appropriate.
> I know that this is not in any way specific to your patch, but I thought
> this was a good opportunity to start this discussion. If we can agree on
> my opinion then it would be great to add a comment to the experimental
> feature to explain such an exit criterion.
I don't have a ton to add here for a graduation plan other than it would
be nice to enable it eventually, likely after a few releases without any
show-stopping bug reports.
> Other than that this patch looks good to me, thanks!
Thanks again for the review!
Thanks,
Taylor
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 0/2] pack-objects: enable multi-pack reuse via feature.experimental
2024-01-16 19:03 [PATCH 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
2024-01-16 19:03 ` [PATCH 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
2024-01-16 19:03 ` [PATCH 2/2] pack-objects: enable multi-pack reuse via `feature.experimental` Taylor Blau
@ 2024-02-05 22:50 ` Taylor Blau
2024-02-05 22:50 ` [PATCH v2 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
2024-02-05 22:50 ` [PATCH v2 2/2] pack-objects: enable multi-pack reuse via `feature.experimental` Taylor Blau
2 siblings, 2 replies; 11+ messages in thread
From: Taylor Blau @ 2024-02-05 22:50 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano, Patrick Steinhardt
Here is a small reroll of my series to teache pack-objects to perform
multi-pack reuse by way of the feature.experimental configuration.
Similar to last time, the hope is that this will expose this new feature
to more users who might not otherwise be aware of lesser-known
configuration options for pack-objects's internals.
The only changes since last time are as follows:
- Rebased forward onto 2a540e432f (The thirteenth batch, 2024-02-02).
- Re-introduced a missing `--delta-base-offset` argument into the new
helper function added and used by t5332.
Thanks in advance for your review!
Taylor Blau (2):
t5332-multi-pack-reuse.sh: extract pack-objects helper functions
pack-objects: enable multi-pack reuse via `feature.experimental`
Documentation/config/feature.txt | 3 ++
builtin/pack-objects.c | 2 +
repo-settings.c | 1 +
repository.h | 1 +
t/t5332-multi-pack-reuse.sh | 85 +++++++++++++++++---------------
5 files changed, 51 insertions(+), 41 deletions(-)
Range-diff against v1:
1: 94dd41e1af ! 1: db3d67bfa3 t5332-multi-pack-reuse.sh: extract pack-objects helper functions
@@ t/t5332-multi-pack-reuse.sh: pack_position () {
+test_pack_objects_reused_all () {
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
-+ git pack-objects --stdout --revs --all >/dev/null &&
++ git pack-objects --stdout --revs --all --delta-base-offset \
++ >/dev/null &&
+
+ test_pack_reused "$1" <trace2.txt &&
+ test_packs_reused "$2" <trace2.txt
2: a2d0af562a = 2: 683ffd154e pack-objects: enable multi-pack reuse via `feature.experimental`
base-commit: 2a540e432fe5dff3cfa9d3bf7ca56db2ad12ebb9
--
2.43.0.524.g683ffd154e
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions
2024-02-05 22:50 ` [PATCH v2 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
@ 2024-02-05 22:50 ` Taylor Blau
2024-02-06 7:25 ` Patrick Steinhardt
2024-02-05 22:50 ` [PATCH v2 2/2] pack-objects: enable multi-pack reuse via `feature.experimental` Taylor Blau
1 sibling, 1 reply; 11+ messages in thread
From: Taylor Blau @ 2024-02-05 22:50 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano, Patrick Steinhardt
Most of the tests in t5332 perform some setup before repeating a common
refrain that looks like:
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs --all >/dev/null &&
test_pack_reused $objects_nr <trace2.txt &&
test_packs_reused $packs_nr <trace2.txt
The next commit will add more tests which repeat the above refrain.
Avoid duplicating this invocation even further and prepare for the
following commit by wrapping the above in a helper function called
`test_pack_objects_reused_all()`.
Introduce another similar function `test_pack_objects_reused`, which
expects to read a list of revisions over stdin for tests which need more
fine-grained control of the contents of the pack they generate.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
t/t5332-multi-pack-reuse.sh | 71 +++++++++++++++----------------------
1 file changed, 29 insertions(+), 42 deletions(-)
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
index 2ba788b042..d516062f84 100755
--- a/t/t5332-multi-pack-reuse.sh
+++ b/t/t5332-multi-pack-reuse.sh
@@ -23,6 +23,27 @@ pack_position () {
grep "$1" objects | cut -d" " -f1
}
+# test_pack_objects_reused_all <pack-reused> <packs-reused>
+test_pack_objects_reused_all () {
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all --delta-base-offset \
+ >/dev/null &&
+
+ test_pack_reused "$1" <trace2.txt &&
+ test_packs_reused "$2" <trace2.txt
+}
+
+# test_pack_objects_reused <pack-reused> <packs-reused>
+test_pack_objects_reused () {
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs >/dev/null &&
+
+ test_pack_reused "$1" <trace2.txt &&
+ test_packs_reused "$2" <trace2.txt
+}
+
test_expect_success 'preferred pack is reused for single-pack reuse' '
test_config pack.allowPackReuse single &&
@@ -34,14 +55,10 @@ test_expect_success 'preferred pack is reused for single-pack reuse' '
git multi-pack-index write --bitmap &&
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs --all >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused_all 3 1
'
+
test_expect_success 'enable multi-pack reuse' '
git config pack.allowPackReuse multi
'
@@ -57,21 +74,11 @@ test_expect_success 'reuse all objects from subset of bitmapped packs' '
^$(git rev-parse A)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs <in >/dev/null &&
-
- test_pack_reused 6 <trace2.txt &&
- test_packs_reused 2 <trace2.txt
+ test_pack_objects_reused 6 2 <in
'
test_expect_success 'reuse all objects from all packs' '
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --revs --all >/dev/null &&
-
- test_pack_reused 9 <trace2.txt &&
- test_packs_reused 3 <trace2.txt
+ test_pack_objects_reused_all 9 3
'
test_expect_success 'reuse objects from first pack with middle gap' '
@@ -104,12 +111,7 @@ test_expect_success 'reuse objects from first pack with middle gap' '
^$(git rev-parse D)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'reuse objects from middle pack with middle gap' '
@@ -125,12 +127,7 @@ test_expect_success 'reuse objects from middle pack with middle gap' '
^$(git rev-parse D)
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'omit delta with uninteresting base (same pack)' '
@@ -160,10 +157,6 @@ test_expect_success 'omit delta with uninteresting base (same pack)' '
^$base
EOF
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
-
# We can only reuse the 3 objects corresponding to "other" from
# the latest pack.
#
@@ -175,8 +168,7 @@ test_expect_success 'omit delta with uninteresting base (same pack)' '
# The remaining objects from the other pack are similarly not
# reused because their objects are on the uninteresting side of
# the query.
- test_pack_reused 3 <trace2.txt &&
- test_packs_reused 1 <trace2.txt
+ test_pack_objects_reused 3 1 <in
'
test_expect_success 'omit delta from uninteresting base (cross pack)' '
@@ -189,15 +181,10 @@ test_expect_success 'omit delta from uninteresting base (cross pack)' '
git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
- : >trace2.txt &&
- GIT_TRACE2_EVENT="$PWD/trace2.txt" \
- git pack-objects --stdout --delta-base-offset --all >/dev/null &&
-
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
objects_nr="$(git rev-list --count --all --objects)" &&
- test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
- test_packs_reused $packs_nr <trace2.txt
+ test_pack_objects_reused_all $(($objects_nr - 1)) $packs_nr
'
test_done
--
2.43.0.524.g683ffd154e
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions
2024-02-05 22:50 ` [PATCH v2 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
@ 2024-02-06 7:25 ` Patrick Steinhardt
0 siblings, 0 replies; 11+ messages in thread
From: Patrick Steinhardt @ 2024-02-06 7:25 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Jeff King, Junio C Hamano, Patrick Steinhardt
[-- Attachment #1: Type: text/plain, Size: 3197 bytes --]
On Mon, Feb 05, 2024 at 05:50:19PM -0500, Taylor Blau wrote:
> Most of the tests in t5332 perform some setup before repeating a common
> refrain that looks like:
>
> : >trace2.txt &&
> GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> git pack-objects --stdout --revs --all >/dev/null &&
>
> test_pack_reused $objects_nr <trace2.txt &&
> test_packs_reused $packs_nr <trace2.txt
>
> The next commit will add more tests which repeat the above refrain.
> Avoid duplicating this invocation even further and prepare for the
> following commit by wrapping the above in a helper function called
> `test_pack_objects_reused_all()`.
>
> Introduce another similar function `test_pack_objects_reused`, which
> expects to read a list of revisions over stdin for tests which need more
> fine-grained control of the contents of the pack they generate.
>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
> t/t5332-multi-pack-reuse.sh | 71 +++++++++++++++----------------------
> 1 file changed, 29 insertions(+), 42 deletions(-)
>
> diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
> index 2ba788b042..d516062f84 100755
> --- a/t/t5332-multi-pack-reuse.sh
> +++ b/t/t5332-multi-pack-reuse.sh
> @@ -23,6 +23,27 @@ pack_position () {
> grep "$1" objects | cut -d" " -f1
> }
>
> +# test_pack_objects_reused_all <pack-reused> <packs-reused>
> +test_pack_objects_reused_all () {
> + : >trace2.txt &&
> + GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> + git pack-objects --stdout --revs --all --delta-base-offset \
> + >/dev/null &&
> +
> + test_pack_reused "$1" <trace2.txt &&
> + test_packs_reused "$2" <trace2.txt
> +}
> +
> +# test_pack_objects_reused <pack-reused> <packs-reused>
> +test_pack_objects_reused () {
> + : >trace2.txt &&
> + GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> + git pack-objects --stdout --revs >/dev/null &&
> +
> + test_pack_reused "$1" <trace2.txt &&
> + test_packs_reused "$2" <trace2.txt
> +}
> +
> test_expect_success 'preferred pack is reused for single-pack reuse' '
> test_config pack.allowPackReuse single &&
>
> @@ -34,14 +55,10 @@ test_expect_success 'preferred pack is reused for single-pack reuse' '
>
> git multi-pack-index write --bitmap &&
>
> - : >trace2.txt &&
> - GIT_TRACE2_EVENT="$PWD/trace2.txt" \
> - git pack-objects --stdout --revs --all >/dev/null &&
> -
> - test_pack_reused 3 <trace2.txt &&
> - test_packs_reused 1 <trace2.txt
> + test_pack_objects_reused_all 3 1
> '
Sorry for being nitpicky, but now we have the reverse problem where this
function adds `--delta-base-offset`. How about we adapt the helper
function so that it accepts trailing arguments like this:
```
test_pack_objects_reused () {
local pack_reused="$1"
local packs_reused="$2"
shift 2
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs "$@" >/dev/null &&
test_pack_reused "$pack_reused" <trace2.txt &&
test_packs_reused "$packs_reused" <trace2.txt
}
```
This merges the two helpers into a single one where callers can decide
by themselves which arguments to pass to git-pack-objects(1).
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 2/2] pack-objects: enable multi-pack reuse via `feature.experimental`
2024-02-05 22:50 ` [PATCH v2 0/2] pack-objects: enable multi-pack reuse via feature.experimental Taylor Blau
2024-02-05 22:50 ` [PATCH v2 1/2] t5332-multi-pack-reuse.sh: extract pack-objects helper functions Taylor Blau
@ 2024-02-05 22:50 ` Taylor Blau
1 sibling, 0 replies; 11+ messages in thread
From: Taylor Blau @ 2024-02-05 22:50 UTC (permalink / raw)
To: git; +Cc: Jeff King, Junio C Hamano, Patrick Steinhardt
Now that multi-pack reuse is supported, enable it via the
feature.experimental configuration in addition to the classic
`pack.allowPackReuse`.
This will allow more users to experiment with the new behavior who might
not otherwise be aware of the existing `pack.allowPackReuse`
configuration option.
The enum with values NO_PACK_REUSE, SINGLE_PACK_REUSE, and
MULTI_PACK_REUSE is defined statically in builtin/pack-objects.c's
compilation unit. We could hoist that enum into a scope visible from the
repository_settings struct, and then use that enum value in
pack-objects. Instead, define a single int that indicates what
pack-objects's default value should be to avoid additional unnecessary
code movement.
Though `feature.experimental` implies `pack.allowPackReuse=multi`, this
can still be overridden by explicitly setting the latter configuration
to either "single" or "false". Tests covering all of these cases are
showin t5332.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/config/feature.txt | 3 +++
builtin/pack-objects.c | 2 ++
repo-settings.c | 1 +
repository.h | 1 +
t/t5332-multi-pack-reuse.sh | 16 ++++++++++++++++
5 files changed, 23 insertions(+)
diff --git a/Documentation/config/feature.txt b/Documentation/config/feature.txt
index bf9546fca4..f061b64b74 100644
--- a/Documentation/config/feature.txt
+++ b/Documentation/config/feature.txt
@@ -17,6 +17,9 @@ skipping more commits at a time, reducing the number of round trips.
+
* `pack.useBitmapBoundaryTraversal=true` may improve bitmap traversal times by
walking fewer objects.
++
+* `pack.allowPackReuse=multi` may improve the time it takes to create a pack by
+reusing objects from multiple packs instead of just one.
feature.manyFiles::
Enable config options that optimize for repos with many files in the
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d8c2128a97..329aeac804 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4396,6 +4396,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
prepare_repo_settings(the_repository);
if (sparse < 0)
sparse = the_repository->settings.pack_use_sparse;
+ if (the_repository->settings.pack_use_multi_pack_reuse)
+ allow_pack_reuse = MULTI_PACK_REUSE;
}
reset_pack_idx_option(&pack_idx_opts);
diff --git a/repo-settings.c b/repo-settings.c
index 30cd478762..a0b590bc6c 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -43,6 +43,7 @@ void prepare_repo_settings(struct repository *r)
if (experimental) {
r->settings.fetch_negotiation_algorithm = FETCH_NEGOTIATION_SKIPPING;
r->settings.pack_use_bitmap_boundary_traversal = 1;
+ r->settings.pack_use_multi_pack_reuse = 1;
}
if (manyfiles) {
r->settings.index_version = 4;
diff --git a/repository.h b/repository.h
index 7a250a6605..21949c5a17 100644
--- a/repository.h
+++ b/repository.h
@@ -39,6 +39,7 @@ struct repo_settings {
int sparse_index;
int pack_read_reverse_index;
int pack_use_bitmap_boundary_traversal;
+ int pack_use_multi_pack_reuse;
/*
* Does this repository have core.useReplaceRefs=true (on by
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
index d516062f84..b925a81d37 100755
--- a/t/t5332-multi-pack-reuse.sh
+++ b/t/t5332-multi-pack-reuse.sh
@@ -58,6 +58,22 @@ test_expect_success 'preferred pack is reused for single-pack reuse' '
test_pack_objects_reused_all 3 1
'
+test_expect_success 'multi-pack reuse is disabled by default' '
+ test_pack_objects_reused_all 3 1
+'
+
+test_expect_success 'feature.experimental implies multi-pack reuse' '
+ test_config feature.experimental true &&
+
+ test_pack_objects_reused_all 6 2
+'
+
+test_expect_success 'multi-pack reuse can be disabled with feature.experimental' '
+ test_config feature.experimental true &&
+ test_config pack.allowPackReuse single &&
+
+ test_pack_objects_reused_all 3 1
+'
test_expect_success 'enable multi-pack reuse' '
git config pack.allowPackReuse multi
--
2.43.0.524.g683ffd154e
^ permalink raw reply related [flat|nested] 11+ messages in thread