From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: christian.couder@gmail.com, gitster@pobox.com,
johannes.schindelin@gmx.de, johncai86@gmail.com,
jonathantanmy@google.com, karthik.188@gmail.com,
kristofferhaugsbakk@fastmail.com, me@ttaylorr.com,
newren@gmail.com, peff@peff.net, ps@pks.im,
Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <stolee@gmail.com>
Subject: [PATCH v3 10/13] pack-objects: refactor path-walk delta phase
Date: Fri, 16 May 2025 18:12:00 +0000 [thread overview]
Message-ID: <2c1d847987285ab98abb6a21d7dcd0bbed750d75.1747419124.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1819.v3.git.1747419124.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail.com>
Previously, the --path-walk option to 'git pack-objects' would compute
deltas inline with the path-walk logic. This would make the progress
indicator look like it is taking a long time to enumerate objects, and
then very quickly computed deltas.
Instead of computing deltas on each region of objects organized by tree,
store a list of regions corresponding to these groups. These can later
be pulled from the list for delta compression before doing the "global"
delta search.
This presents a new progress indicator that can be used in tests to
verify that this stage is happening.
The current implementation is not integrated with threads, but we are
setting it up to arrive in the next change.
Since we do not attempt to sort objects by size until after exploring
all trees, we can remove the previous change to t5530 due to a different
error message appearing first.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/pack-objects.c | 83 +++++++++++++++++++++++++-----------
pack-objects.h | 12 ++++++
t/t5300-pack-object.sh | 8 +++-
t/t5530-upload-pack-error.sh | 6 ---
4 files changed, 75 insertions(+), 34 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index bdd20c074a9b..c7bf3fbc0267 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3237,6 +3237,51 @@ static int should_attempt_deltas(struct object_entry *entry)
return 1;
}
+static void find_deltas_for_region(struct object_entry *list,
+ struct packing_region *region,
+ unsigned int *processed)
+{
+ struct object_entry **delta_list;
+ unsigned int delta_list_nr = 0;
+
+ ALLOC_ARRAY(delta_list, region->nr);
+ for (size_t i = 0; i < region->nr; i++) {
+ struct object_entry *entry = list + region->start + i;
+ if (should_attempt_deltas(entry))
+ delta_list[delta_list_nr++] = entry;
+ }
+
+ QSORT(delta_list, delta_list_nr, type_size_sort);
+ find_deltas(delta_list, &delta_list_nr, window, depth, processed);
+ free(delta_list);
+}
+
+static void find_deltas_by_region(struct object_entry *list,
+ struct packing_region *regions,
+ size_t start, size_t nr)
+{
+ unsigned int processed = 0;
+ size_t progress_nr;
+
+ if (!nr)
+ return;
+
+ progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
+
+ if (progress)
+ progress_state = start_progress(the_repository,
+ _("Compressing objects by path"),
+ progress_nr);
+
+ while (nr--)
+ find_deltas_for_region(list,
+ ®ions[start++],
+ &processed);
+
+ display_progress(progress_state, progress_nr);
+ stop_progress(&progress_state);
+}
+
static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
@@ -3261,6 +3306,10 @@ static void prepare_pack(int window, int depth)
if (!to_pack.nr_objects || !window || !depth)
return;
+ if (path_walk)
+ find_deltas_by_region(to_pack.objects, to_pack.regions,
+ 0, to_pack.nr_regions);
+
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
nr_deltas = n = 0;
@@ -4214,10 +4263,8 @@ static int add_objects_by_path(const char *path,
enum object_type type,
void *data)
{
- struct object_entry **delta_list = NULL;
size_t oe_start = to_pack.nr_objects;
size_t oe_end;
- unsigned int sub_list_nr;
unsigned int *processed = data;
/*
@@ -4250,33 +4297,17 @@ static int add_objects_by_path(const char *path,
if (oe_end == oe_start || !window)
return 0;
- sub_list_nr = 0;
- if (oe_end > oe_start)
- ALLOC_ARRAY(delta_list, oe_end - oe_start);
+ ALLOC_GROW(to_pack.regions,
+ to_pack.nr_regions + 1,
+ to_pack.nr_regions_alloc);
- for (size_t i = 0; i < oe_end - oe_start; i++) {
- struct object_entry *entry = to_pack.objects + oe_start + i;
+ to_pack.regions[to_pack.nr_regions].start = oe_start;
+ to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
+ to_pack.nr_regions++;
- if (!should_attempt_deltas(entry))
- continue;
+ *processed += oids->nr;
+ display_progress(progress_state, *processed);
- delta_list[sub_list_nr++] = entry;
- }
-
- /*
- * Find delta bases among this list of objects that all match the same
- * path. This causes the delta compression to be interleaved in the
- * object walk, which can lead to confusing progress indicators. This is
- * also incompatible with threaded delta calculations. In the future,
- * consider creating a list of regions in the full to_pack.objects array
- * that could be picked up by the threaded delta computation.
- */
- if (sub_list_nr && window) {
- QSORT(delta_list, sub_list_nr, type_size_sort);
- find_deltas(delta_list, &sub_list_nr, window, depth, processed);
- }
-
- free(delta_list);
return 0;
}
diff --git a/pack-objects.h b/pack-objects.h
index d73e3843c92e..51e1ff6b95bf 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -119,11 +119,23 @@ struct object_entry {
unsigned ext_base:1; /* delta_idx points outside packlist */
};
+/**
+ * A packing region is a section of the packing_data.objects array
+ * as given by a starting index and a number of elements.
+ */
+struct packing_region {
+ size_t start;
+ size_t nr;
+};
+
struct packing_data {
struct repository *repo;
struct object_entry *objects;
uint32_t nr_objects, nr_alloc;
+ struct packing_region *regions;
+ size_t nr_regions, nr_regions_alloc;
+
int32_t *index;
uint32_t index_size;
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 16420d128639..c8df6afd7844 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -725,7 +725,9 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
test_expect_success '--path-walk pack everything' '
git -C server rev-parse HEAD >in &&
- git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
+ GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+ --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+ grep "Compressing objects by path" err &&
git -C server index-pack --stdin <out.pack
'
@@ -734,7 +736,9 @@ test_expect_success '--path-walk thin pack' '
$(git -C server rev-parse HEAD)
^$(git -C server rev-parse HEAD~2)
EOF
- git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
+ GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+ --thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+ grep "Compressing objects by path" err &&
git -C server index-pack --fix-thin --stdin <out.pack
'
diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh
index 8eb6fea839a6..558eedf25a4c 100755
--- a/t/t5530-upload-pack-error.sh
+++ b/t/t5530-upload-pack-error.sh
@@ -34,12 +34,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
hexsz=$(test_oid hexsz) &&
printf "%04xwant %s\n00000009done\n0000" \
$(($hexsz + 10)) $head >input &&
-
- # The current implementation of path-walk causes a different
- # error message. This will be changed by a future refactoring.
- GIT_TEST_PACK_PATH_WALK=0 &&
- export GIT_TEST_PACK_PATH_WALK &&
-
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
test_grep "unable to read" output.err &&
test_grep "pack-objects died" output.err
--
gitgitgadget
next prev parent reply other threads:[~2025-05-16 18:12 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-10 1:50 [PATCH 00/13] PATH WALK II: Add --path-walk option to 'git pack-objects' Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 01/13] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2025-03-12 21:01 ` Taylor Blau
2025-03-20 19:48 ` Derrick Stolee
2025-03-10 1:50 ` [PATCH 02/13] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2025-03-12 21:14 ` Taylor Blau
2025-03-20 19:46 ` Derrick Stolee
2025-03-10 1:50 ` [PATCH 03/13] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2025-03-12 21:14 ` Taylor Blau
2025-03-10 1:50 ` [PATCH 04/13] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 05/13] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 06/13] t5538: add tests to confirm deltas in shallow pushes Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 07/13] repack: add --path-walk option Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 08/13] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 09/13] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 10/13] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2025-03-12 21:21 ` Taylor Blau
2025-03-20 19:57 ` Derrick Stolee
2025-03-10 1:50 ` [PATCH 11/13] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 12/13] path-walk: add new 'edge_aggressive' option Derrick Stolee via GitGitGadget
2025-03-10 1:50 ` [PATCH 13/13] pack-objects: allow --shallow and --path-walk Derrick Stolee via GitGitGadget
2025-03-10 17:28 ` [PATCH 00/13] PATH WALK II: Add --path-walk option to 'git pack-objects' Junio C Hamano
2025-03-12 20:47 ` Taylor Blau
2025-03-20 20:18 ` Derrick Stolee
2025-03-24 15:22 ` [PATCH v2 " Derrick Stolee via GitGitGadget
2025-03-24 15:22 ` [PATCH v2 01/13] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2025-05-02 22:48 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 02/13] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2025-05-02 23:21 ` Taylor Blau
2025-05-06 19:39 ` Derrick Stolee
2025-05-16 15:27 ` Derrick Stolee
2025-03-24 15:22 ` [PATCH v2 03/13] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2025-03-24 15:22 ` [PATCH v2 04/13] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2025-05-02 23:25 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 05/13] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2025-05-02 23:31 ` Taylor Blau
2025-05-06 19:43 ` Derrick Stolee
2025-03-24 15:22 ` [PATCH v2 06/13] t5538: add tests to confirm deltas in shallow pushes Derrick Stolee via GitGitGadget
2025-05-02 23:34 ` Taylor Blau
2025-05-16 15:32 ` Derrick Stolee
2025-03-24 15:22 ` [PATCH v2 07/13] repack: add --path-walk option Derrick Stolee via GitGitGadget
2025-05-02 23:38 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 08/13] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2025-05-02 23:42 ` Taylor Blau
2025-05-06 19:46 ` Derrick Stolee
2025-05-16 15:41 ` Derrick Stolee
2025-03-24 15:22 ` [PATCH v2 09/13] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2025-05-07 0:58 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 10/13] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2025-05-07 1:14 ` Taylor Blau
2025-05-16 16:27 ` Derrick Stolee
2025-05-29 0:17 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 11/13] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2025-05-07 1:33 ` Taylor Blau
2025-03-24 15:22 ` [PATCH v2 12/13] path-walk: add new 'edge_aggressive' option Derrick Stolee via GitGitGadget
2025-03-24 15:22 ` [PATCH v2 13/13] pack-objects: allow --shallow and --path-walk Derrick Stolee via GitGitGadget
2025-05-02 21:24 ` [PATCH v2 00/13] PATH WALK II: Add --path-walk option to 'git pack-objects' Junio C Hamano
2025-05-02 22:45 ` Taylor Blau
2025-05-02 23:44 ` Taylor Blau
2025-05-07 1:35 ` Taylor Blau
2025-05-16 18:11 ` [PATCH v3 " Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 01/13] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 02/13] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 03/13] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 04/13] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 05/13] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 06/13] t5538: add tests to confirm deltas in shallow pushes Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 07/13] repack: add --path-walk option Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 08/13] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2025-05-16 18:11 ` [PATCH v3 09/13] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2025-05-16 18:12 ` Derrick Stolee via GitGitGadget [this message]
2025-05-16 18:12 ` [PATCH v3 11/13] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2025-05-16 18:12 ` [PATCH v3 12/13] path-walk: add new 'edge_aggressive' option Derrick Stolee via GitGitGadget
2025-05-16 18:12 ` [PATCH v3 13/13] pack-objects: allow --shallow and --path-walk Derrick Stolee via GitGitGadget
2025-05-29 0:20 ` [PATCH v3 00/13] PATH WALK II: Add --path-walk option to 'git pack-objects' Taylor Blau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c1d847987285ab98abb6a21d7dcd0bbed750d75.1747419124.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=christian.couder@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=jonathantanmy@google.com \
--cc=karthik.188@gmail.com \
--cc=kristofferhaugsbakk@fastmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).