From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, christian.couder@gmail.com,
kristofferhaugsbakk@fastmail.com,
Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <stolee@gmail.com>
Subject: [PATCH v2 06/17] path-walk: add prune_all_uninteresting option
Date: Sun, 20 Oct 2024 13:43:19 +0000 [thread overview]
Message-ID: <238d7d95715d3e161489a5ef8d788c0cac4f7a03.1729431810.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1813.v2.git.1729431810.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail.com>
This option causes the path-walk API to act like the sparse tree-walk
algorithm implemented by mark_trees_uninteresting_sparse() in
list-objects.c.
Starting from the commits marked as UNINTERESTING, their root trees and
all objects reachable from those trees are UNINTERSTING, at least as we
walk path-by-path. When we reach a path where all objects associated
with that path are marked UNINTERESTING, then do no continue walking the
children of that path.
We need to be careful to pass the UNINTERESTING flag in a deep way on
the UNINTERESTING objects before we start the path-walk, or else the
depth-first search for the path-walk API may accidentally report some
objects as interesting.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/technical/api-path-walk.txt | 8 +++
path-walk.c | 64 ++++++++++++++++++++++-
path-walk.h | 8 +++
t/helper/test-path-walk.c | 10 +++-
t/t6601-path-walk.sh | 40 +++++++++++---
5 files changed, 118 insertions(+), 12 deletions(-)
diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt
index 5fea1d1db17..c51f92cd649 100644
--- a/Documentation/technical/api-path-walk.txt
+++ b/Documentation/technical/api-path-walk.txt
@@ -57,6 +57,14 @@ commits are emitted.
While it is possible to walk only commits in this way, consumers would be
better off using the revision walk API instead.
+`prune_all_uninteresting`::
+ By default, all reachable paths are emitted by the path-walk API.
+ This option allows consumers to declare that they are not
+ interested in paths where all included objects are marked with the
+ `UNINTERESTING` flag. This requires using the `boundary` option in
+ the revision walk so that the walk emits commits marked with the
+ `UNINTERESTING` flag.
+
Examples
--------
diff --git a/path-walk.c b/path-walk.c
index 55758f50abd..910dfc6fdc9 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -22,6 +22,7 @@ struct type_and_oid_list
{
enum object_type type;
struct oid_array oids;
+ int maybe_interesting;
};
#define TYPE_AND_OID_LIST_INIT { \
@@ -124,6 +125,8 @@ static int add_children(struct path_walk_context *ctx,
strmap_put(&ctx->paths_to_lists, path.buf, list);
string_list_append(&ctx->path_stack, path.buf);
}
+ if (!(o->flags & UNINTERESTING))
+ list->maybe_interesting = 1;
oid_array_append(&list->oids, &entry.oid);
}
@@ -145,6 +148,40 @@ static int walk_path(struct path_walk_context *ctx,
list = strmap_get(&ctx->paths_to_lists, path);
+ if (ctx->info->prune_all_uninteresting) {
+ /*
+ * This is true if all objects were UNINTERESTING
+ * when added to the list.
+ */
+ if (!list->maybe_interesting)
+ return 0;
+
+ /*
+ * But it's still possible that the objects were set
+ * as UNINTERESTING after being added. Do a quick check.
+ */
+ list->maybe_interesting = 0;
+ for (size_t i = 0;
+ !list->maybe_interesting && i < list->oids.nr;
+ i++) {
+ if (list->type == OBJ_TREE) {
+ struct tree *t = lookup_tree(ctx->repo,
+ &list->oids.oid[i]);
+ if (t && !(t->object.flags & UNINTERESTING))
+ list->maybe_interesting = 1;
+ } else {
+ struct blob *b = lookup_blob(ctx->repo,
+ &list->oids.oid[i]);
+ if (b && !(b->object.flags & UNINTERESTING))
+ list->maybe_interesting = 1;
+ }
+ }
+
+ /* We have confirmed that all objects are UNINTERESTING. */
+ if (!list->maybe_interesting)
+ return 0;
+ }
+
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs))
@@ -187,7 +224,7 @@ static void clear_strmap(struct strmap *map)
int walk_objects_by_path(struct path_walk_info *info)
{
const char *root_path = "";
- int ret = 0;
+ int ret = 0, has_uninteresting = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
@@ -199,6 +236,7 @@ int walk_objects_by_path(struct path_walk_info *info)
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};
+ struct oidset root_tree_set = OIDSET_INIT;
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
@@ -211,6 +249,7 @@ int walk_objects_by_path(struct path_walk_info *info)
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
+ root_tree_list->maybe_interesting = 1;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
/*
@@ -306,10 +345,16 @@ int walk_objects_by_path(struct path_walk_info *info)
t = lookup_tree(info->revs->repo, oid);
if (t) {
+ if ((c->object.flags & UNINTERESTING)) {
+ t->object.flags |= UNINTERESTING;
+ has_uninteresting = 1;
+ }
+
if (t->object.flags & SEEN)
continue;
t->object.flags |= SEEN;
- oid_array_append(&root_tree_list->oids, oid);
+ if (!oidset_insert(&root_tree_set, oid))
+ oid_array_append(&root_tree_list->oids, oid);
} else {
warning("could not find tree %s", oid_to_hex(oid));
}
@@ -325,6 +370,21 @@ int walk_objects_by_path(struct path_walk_info *info)
oid_array_clear(&commit_list->oids);
free(commit_list);
+ /*
+ * Before performing a DFS of our paths and emitting them as interesting,
+ * do a full walk of the trees to distribute the UNINTERESTING bit. Use
+ * the sparse algorithm if prune_all_uninteresting was set.
+ */
+ if (has_uninteresting) {
+ trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
+ if (info->prune_all_uninteresting)
+ mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
+ else
+ mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
+ trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
+ }
+ oidset_clear(&root_tree_set);
+
string_list_append(&ctx.path_stack, root_path);
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
diff --git a/path-walk.h b/path-walk.h
index 3f3b63180ef..3e44c4b8a58 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -38,6 +38,14 @@ struct path_walk_info {
int trees;
int blobs;
int tags;
+
+ /**
+ * When 'prune_all_uninteresting' is set and a path has all objects
+ * marked as UNINTERESTING, then the path-walk will not visit those
+ * objects. It will not call path_fn on those objects and will not
+ * walk the children of such trees.
+ */
+ int prune_all_uninteresting;
};
#define PATH_WALK_INFO_INIT { \
diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c
index c6c60d68749..06b103d8760 100644
--- a/t/helper/test-path-walk.c
+++ b/t/helper/test-path-walk.c
@@ -55,8 +55,12 @@ static int emit_block(const char *path, struct oid_array *oids,
BUG("we do not understand this type");
}
- for (size_t i = 0; i < oids->nr; i++)
- printf("%s:%s:%s\n", typestr, path, oid_to_hex(&oids->oid[i]));
+ for (size_t i = 0; i < oids->nr; i++) {
+ struct object *o = lookup_unknown_object(the_repository,
+ &oids->oid[i]);
+ printf("%s:%s:%s%s\n", typestr, path, oid_to_hex(&oids->oid[i]),
+ o->flags & UNINTERESTING ? ":UNINTERESTING" : "");
+ }
return 0;
}
@@ -76,6 +80,8 @@ int cmd__path_walk(int argc, const char **argv)
N_("toggle inclusion of tag objects")),
OPT_BOOL(0, "trees", &info.trees,
N_("toggle inclusion of tree objects")),
+ OPT_BOOL(0, "prune", &info.prune_all_uninteresting,
+ N_("toggle pruning of uninteresting paths")),
OPT_END(),
};
diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh
index 7758e2529ee..943adc6c8f1 100755
--- a/t/t6601-path-walk.sh
+++ b/t/t6601-path-walk.sh
@@ -229,19 +229,19 @@ test_expect_success 'topic, not base, boundary' '
cat >expect <<-EOF &&
COMMIT::$(git rev-parse topic)
- COMMIT::$(git rev-parse base~1)
+ COMMIT::$(git rev-parse base~1):UNINTERESTING
commits:2
TREE::$(git rev-parse topic^{tree})
- TREE::$(git rev-parse base~1^{tree})
- TREE:left/:$(git rev-parse base~1:left)
+ TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
+ TREE:left/:$(git rev-parse base~1:left):UNINTERESTING
TREE:right/:$(git rev-parse topic:right)
- TREE:right/:$(git rev-parse base~1:right)
+ TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
trees:5
- BLOB:a:$(git rev-parse base~1:a)
- BLOB:left/b:$(git rev-parse base~1:left/b)
- BLOB:right/c:$(git rev-parse base~1:right/c)
+ BLOB:a:$(git rev-parse base~1:a):UNINTERESTING
+ BLOB:left/b:$(git rev-parse base~1:left/b):UNINTERESTING
+ BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
BLOB:right/c:$(git rev-parse topic:right/c)
- BLOB:right/d:$(git rev-parse base~1:right/d)
+ BLOB:right/d:$(git rev-parse base~1:right/d):UNINTERESTING
blobs:5
tags:0
EOF
@@ -252,6 +252,30 @@ test_expect_success 'topic, not base, boundary' '
test_cmp expect.sorted out.sorted
'
+test_expect_success 'topic, not base, boundary with pruning' '
+ test-tool path-walk --prune -- --boundary topic --not base >out &&
+
+ cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ COMMIT::$(git rev-parse base~1):UNINTERESTING
+ commits:2
+ TREE::$(git rev-parse topic^{tree})
+ TREE::$(git rev-parse base~1^{tree}):UNINTERESTING
+ TREE:right/:$(git rev-parse topic:right)
+ TREE:right/:$(git rev-parse base~1:right):UNINTERESTING
+ trees:4
+ BLOB:right/c:$(git rev-parse base~1:right/c):UNINTERESTING
+ BLOB:right/c:$(git rev-parse topic:right/c)
+ blobs:2
+ tags:0
+ EOF
+
+ sort expect >expect.sorted &&
+ sort out >out.sorted &&
+
+ test_cmp expect.sorted out.sorted
+'
+
test_expect_success 'trees are reported exactly once' '
test_when_finished "rm -rf unique-trees" &&
test_create_repo unique-trees &&
--
gitgitgadget
next prev parent reply other threads:[~2024-10-20 13:43 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-08 14:11 [PATCH 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 03/17] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 06/17] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-28 19:54 ` Jonathan Tan
2024-10-29 18:07 ` Taylor Blau
2024-10-29 21:36 ` Jonathan Tan
2024-10-29 22:16 ` Taylor Blau
2024-10-31 2:04 ` Derrick Stolee
2024-10-31 2:14 ` Derrick Stolee
2024-10-31 21:02 ` Taylor Blau
2024-10-31 2:12 ` Derrick Stolee
2024-10-08 14:11 ` [PATCH 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 16/17] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 03/17] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` Derrick Stolee via GitGitGadget [this message]
2024-10-20 13:43 ` [PATCH v2 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 16/17] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-21 21:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Taylor Blau
2024-10-24 13:29 ` Derrick Stolee
2024-10-24 15:52 ` Taylor Blau
2024-10-28 5:46 ` Patrick Steinhardt
2024-10-28 16:47 ` Taylor Blau
2024-10-28 17:13 ` Derrick Stolee
2024-10-28 17:25 ` Taylor Blau
2024-10-28 19:46 ` Derrick Stolee
2024-10-29 18:02 ` Taylor Blau
2024-10-31 2:28 ` Derrick Stolee
2024-10-31 21:07 ` Taylor Blau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=238d7d95715d3e161489a5ef8d788c0cac4f7a03.1729431810.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=christian.couder@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=kristofferhaugsbakk@fastmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).