From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, christian.couder@gmail.com,
kristofferhaugsbakk@fastmail.com,
Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <stolee@gmail.com>
Subject: [PATCH v2 16/17] pack-objects: refactor path-walk delta phase
Date: Sun, 20 Oct 2024 13:43:29 +0000 [thread overview]
Message-ID: <030d8ec238e7692e029ae063e475f58e4c172542.1729431811.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1813.v2.git.1729431810.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail.com>
Previously, the --path-walk option to 'git pack-objects' would compute
deltas inline with the path-walk logic. This would make the progress
indicator look like it is taking a long time to enumerate objects, and
then very quickly computed deltas.
Instead of computing deltas on each region of objects organized by tree,
store a list of regions corresponding to these groups. These can later
be pulled from the list for delta compression before doing the "global"
delta search.
This presents a new progress indicator that can be used in tests to
verify that this stage is happening.
The current implementation is not integrated with threads, but could be
done in a future update.
Since we do not attempt to sort objects by size until after exploring
all trees, we can remove the previous change to t5530 due to a different
error message appearing first.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/pack-objects.c | 81 +++++++++++++++++++++++++-----------
pack-objects.h | 12 ++++++
t/t5300-pack-object.sh | 8 +++-
t/t5530-upload-pack-error.sh | 6 ---
4 files changed, 74 insertions(+), 33 deletions(-)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 6805a55c60d..5c413ac07e6 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -3204,6 +3204,50 @@ static int should_attempt_deltas(struct object_entry *entry)
return 1;
}
+static void find_deltas_for_region(struct object_entry *list UNUSED,
+ struct packing_region *region,
+ unsigned int *processed)
+{
+ struct object_entry **delta_list;
+ uint32_t delta_list_nr = 0;
+
+ ALLOC_ARRAY(delta_list, region->nr);
+ for (uint32_t i = 0; i < region->nr; i++) {
+ struct object_entry *entry = to_pack.objects + region->start + i;
+ if (should_attempt_deltas(entry))
+ delta_list[delta_list_nr++] = entry;
+ }
+
+ QSORT(delta_list, delta_list_nr, type_size_sort);
+ find_deltas(delta_list, &delta_list_nr, window, depth, processed);
+ free(delta_list);
+}
+
+static void find_deltas_by_region(struct object_entry *list,
+ struct packing_region *regions,
+ uint32_t start, uint32_t nr)
+{
+ unsigned int processed = 0;
+ uint32_t progress_nr;
+
+ if (!nr)
+ return;
+
+ progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
+
+ if (progress)
+ progress_state = start_progress(_("Compressing objects by path"),
+ progress_nr);
+
+ while (nr--)
+ find_deltas_for_region(list,
+ ®ions[start++],
+ &processed);
+
+ display_progress(progress_state, progress_nr);
+ stop_progress(&progress_state);
+}
+
static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
@@ -3228,6 +3272,10 @@ static void prepare_pack(int window, int depth)
if (!to_pack.nr_objects || !window || !depth)
return;
+ if (path_walk)
+ find_deltas_by_region(to_pack.objects, to_pack.regions,
+ 0, to_pack.nr_regions);
+
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
nr_deltas = n = 0;
@@ -4165,10 +4213,8 @@ static int add_objects_by_path(const char *path,
enum object_type type,
void *data)
{
- struct object_entry **delta_list;
size_t oe_start = to_pack.nr_objects;
size_t oe_end;
- unsigned int sub_list_size;
unsigned int *processed = data;
/*
@@ -4201,32 +4247,17 @@ static int add_objects_by_path(const char *path,
if (oe_end == oe_start || !window)
return 0;
- sub_list_size = 0;
- ALLOC_ARRAY(delta_list, oe_end - oe_start);
+ ALLOC_GROW(to_pack.regions,
+ to_pack.nr_regions + 1,
+ to_pack.nr_regions_alloc);
- for (size_t i = 0; i < oe_end - oe_start; i++) {
- struct object_entry *entry = to_pack.objects + oe_start + i;
+ to_pack.regions[to_pack.nr_regions].start = oe_start;
+ to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
+ to_pack.nr_regions++;
- if (!should_attempt_deltas(entry))
- continue;
+ *processed += oids->nr;
+ display_progress(progress_state, *processed);
- delta_list[sub_list_size++] = entry;
- }
-
- /*
- * Find delta bases among this list of objects that all match the same
- * path. This causes the delta compression to be interleaved in the
- * object walk, which can lead to confusing progress indicators. This is
- * also incompatible with threaded delta calculations. In the future,
- * consider creating a list of regions in the full to_pack.objects array
- * that could be picked up by the threaded delta computation.
- */
- if (sub_list_size && window) {
- QSORT(delta_list, sub_list_size, type_size_sort);
- find_deltas(delta_list, &sub_list_size, window, depth, processed);
- }
-
- free(delta_list);
return 0;
}
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64b..bde4ba19755 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -118,11 +118,23 @@ struct object_entry {
unsigned ext_base:1; /* delta_idx points outside packlist */
};
+/**
+ * A packing region is a section of the packing_data.objects array
+ * as given by a starting index and a number of elements.
+ */
+struct packing_region {
+ uint32_t start;
+ uint32_t nr;
+};
+
struct packing_data {
struct repository *repo;
struct object_entry *objects;
uint32_t nr_objects, nr_alloc;
+ struct packing_region *regions;
+ uint32_t nr_regions, nr_regions_alloc;
+
int32_t *index;
uint32_t index_size;
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index 5f6914acae7..4f81613eab1 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -677,7 +677,9 @@ done
# Basic "repack everything" test
test_expect_success '--path-walk pack everything' '
git -C server rev-parse HEAD >in &&
- git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
+ GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+ --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+ grep "Compressing objects by path" err &&
git -C server index-pack --stdin <out.pack
'
@@ -687,7 +689,9 @@ test_expect_success '--path-walk thin pack' '
$(git -C server rev-parse HEAD)
^$(git -C server rev-parse HEAD~2)
EOF
- git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
+ GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+ --thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+ grep "Compressing objects by path" err &&
git -C server index-pack --fix-thin --stdin <out.pack
'
diff --git a/t/t5530-upload-pack-error.sh b/t/t5530-upload-pack-error.sh
index 356b96cb741..7172780d550 100755
--- a/t/t5530-upload-pack-error.sh
+++ b/t/t5530-upload-pack-error.sh
@@ -35,12 +35,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
hexsz=$(test_oid hexsz) &&
printf "%04xwant %s\n00000009done\n0000" \
$(($hexsz + 10)) $head >input &&
-
- # The current implementation of path-walk causes a different
- # error message. This will be changed by a future refactoring.
- GIT_TEST_PACK_PATH_WALK=0 &&
- export GIT_TEST_PACK_PATH_WALK &&
-
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
test_grep "unable to read" output.err &&
test_grep "pack-objects died" output.err
--
gitgitgadget
next prev parent reply other threads:[~2024-10-20 13:43 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-08 14:11 [PATCH 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 03/17] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 06/17] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-28 19:54 ` Jonathan Tan
2024-10-29 18:07 ` Taylor Blau
2024-10-29 21:36 ` Jonathan Tan
2024-10-29 22:16 ` Taylor Blau
2024-10-31 2:04 ` Derrick Stolee
2024-10-31 2:14 ` Derrick Stolee
2024-10-31 21:02 ` Taylor Blau
2024-10-31 2:12 ` Derrick Stolee
2024-10-08 14:11 ` [PATCH 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 16/17] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 03/17] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 06/17] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` Derrick Stolee via GitGitGadget [this message]
2024-10-20 13:43 ` [PATCH v2 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-21 21:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Taylor Blau
2024-10-24 13:29 ` Derrick Stolee
2024-10-24 15:52 ` Taylor Blau
2024-10-28 5:46 ` Patrick Steinhardt
2024-10-28 16:47 ` Taylor Blau
2024-10-28 17:13 ` Derrick Stolee
2024-10-28 17:25 ` Taylor Blau
2024-10-28 19:46 ` Derrick Stolee
2024-10-29 18:02 ` Taylor Blau
2024-10-31 2:28 ` Derrick Stolee
2024-10-31 21:07 ` Taylor Blau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=030d8ec238e7692e029ae063e475f58e4c172542.1729431811.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=christian.couder@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=kristofferhaugsbakk@fastmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).