From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, christian.couder@gmail.com,
kristofferhaugsbakk@fastmail.com,
Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 03/17] path-walk: allow consumer to specify object types
Date: Tue, 08 Oct 2024 14:11:49 +0000 [thread overview]
Message-ID: <14375d19392c406d903f6cb26be5a39c4c2ff1e9.1728396724.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1813.git.1728396723.gitgitgadget@gmail.com>
From: Derrick Stolee <derrickstolee@github.com>
We add the ability to filter the object types in the path-walk API so
the callback function is called fewer times.
This adds the ability to ask for the commits in a list, as well. Future
changes will add the ability to visit annotated tags.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
Documentation/technical/api-path-walk.txt | 9 +++
path-walk.c | 39 ++++++++++--
path-walk.h | 13 +++-
t/helper/test-path-walk.c | 17 +++++-
t/t6601-path-walk.sh | 72 +++++++++++++++++++++++
5 files changed, 141 insertions(+), 9 deletions(-)
diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt
index e588897ab8d..b7ae476ea0a 100644
--- a/Documentation/technical/api-path-walk.txt
+++ b/Documentation/technical/api-path-walk.txt
@@ -48,6 +48,15 @@ If you want the path-walk API to emit `UNINTERESTING` objects based on the
commit walk's boundary, be sure to set `revs.boundary` so the boundary
commits are emitted.
+`commits`, `blobs`, `trees`::
+ By default, these members are enabled and signal that the path-walk
+ API should call the `path_fn` on objects of these types. Specialized
+ applications could disable some options to make it simpler to walk
+ the objects or to have fewer calls to `path_fn`.
++
+While it is possible to walk only commits in this way, consumers would be
+better off using the revision walk API instead.
+
Examples
--------
diff --git a/path-walk.c b/path-walk.c
index 66840187e28..22e1aa13f31 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -84,6 +84,10 @@ static int add_children(struct path_walk_context *ctx,
if (S_ISGITLINK(entry.mode))
continue;
+ /* If the caller doesn't want blobs, then don't bother. */
+ if (!ctx->info->blobs && type == OBJ_BLOB)
+ continue;
+
if (type == OBJ_TREE) {
struct tree *child = lookup_tree(ctx->repo, &entry.oid);
o = child ? &child->object : NULL;
@@ -140,9 +144,11 @@ static int walk_path(struct path_walk_context *ctx,
list = strmap_get(&ctx->paths_to_lists, path);
- /* Evaluate function pointer on this data. */
- ret = ctx->info->path_fn(path, &list->oids, list->type,
- ctx->info->path_fn_data);
+ /* Evaluate function pointer on this data, if requested. */
+ if ((list->type == OBJ_TREE && ctx->info->trees) ||
+ (list->type == OBJ_BLOB && ctx->info->blobs))
+ ret = ctx->info->path_fn(path, &list->oids, list->type,
+ ctx->info->path_fn_data);
/* Expand data for children. */
if (list->type == OBJ_TREE) {
@@ -184,6 +190,7 @@ int walk_objects_by_path(struct path_walk_info *info)
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
+ struct type_and_oid_list *commit_list;
struct path_walk_context ctx = {
.repo = info->revs->repo,
.revs = info->revs,
@@ -194,19 +201,32 @@ int walk_objects_by_path(struct path_walk_info *info)
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
+ CALLOC_ARRAY(commit_list, 1);
+ commit_list->type = OBJ_COMMIT;
+
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
-
if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));
while ((c = get_revision(info->revs))) {
- struct object_id *oid = get_commit_tree_oid(c);
- struct tree *t = lookup_tree(info->revs->repo, oid);
+ struct object_id *oid;
+ struct tree *t;
commits_nr++;
+ if (info->commits)
+ oid_array_append(&commit_list->oids,
+ &c->object.oid);
+
+ /* If we only care about commits, then skip trees. */
+ if (!info->trees && !info->blobs)
+ continue;
+
+ oid = get_commit_tree_oid(c);
+ t = lookup_tree(info->revs->repo, oid);
+
if (t) {
if (t->object.flags & SEEN)
continue;
@@ -220,6 +240,13 @@ int walk_objects_by_path(struct path_walk_info *info)
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
+ /* Track all commits. */
+ if (info->commits)
+ ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
+ info->path_fn_data);
+ oid_array_clear(&commit_list->oids);
+ free(commit_list);
+
string_list_append(&ctx.path_stack, root_path);
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
diff --git a/path-walk.h b/path-walk.h
index c9e94a98bc8..6ef372d8942 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -30,9 +30,20 @@ struct path_walk_info {
*/
path_fn path_fn;
void *path_fn_data;
+ /**
+ * Initialize which object types the path_fn should be called on. This
+ * could also limit the walk to skip blobs if not set.
+ */
+ int commits;
+ int trees;
+ int blobs;
};
-#define PATH_WALK_INFO_INIT { 0 }
+#define PATH_WALK_INFO_INIT { \
+ .blobs = 1, \
+ .trees = 1, \
+ .commits = 1, \
+}
/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c
index 3c48f017fa0..37c5e3e31e8 100644
--- a/t/helper/test-path-walk.c
+++ b/t/helper/test-path-walk.c
@@ -18,6 +18,7 @@ static const char * const path_walk_usage[] = {
};
struct path_walk_test_data {
+ uintmax_t commit_nr;
uintmax_t tree_nr;
uintmax_t blob_nr;
};
@@ -29,6 +30,11 @@ static int emit_block(const char *path, struct oid_array *oids,
const char *typestr;
switch (type) {
+ case OBJ_COMMIT:
+ typestr = "COMMIT";
+ tdata->commit_nr += oids->nr;
+ break;
+
case OBJ_TREE:
typestr = "TREE";
tdata->tree_nr += oids->nr;
@@ -56,6 +62,12 @@ int cmd__path_walk(int argc, const char **argv)
struct path_walk_info info = PATH_WALK_INFO_INIT;
struct path_walk_test_data data = { 0 };
struct option options[] = {
+ OPT_BOOL(0, "blobs", &info.blobs,
+ N_("toggle inclusion of blob objects")),
+ OPT_BOOL(0, "commits", &info.commits,
+ N_("toggle inclusion of commit objects")),
+ OPT_BOOL(0, "trees", &info.trees,
+ N_("toggle inclusion of tree objects")),
OPT_END(),
};
@@ -78,9 +90,10 @@ int cmd__path_walk(int argc, const char **argv)
res = walk_objects_by_path(&info);
- printf("trees:%" PRIuMAX "\n"
+ printf("commits:%" PRIuMAX "\n"
+ "trees:%" PRIuMAX "\n"
"blobs:%" PRIuMAX "\n",
- data.tree_nr, data.blob_nr);
+ data.commit_nr, data.tree_nr, data.blob_nr);
return res;
}
diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh
index ca18b61c3f1..e4788664f93 100755
--- a/t/t6601-path-walk.sh
+++ b/t/t6601-path-walk.sh
@@ -31,6 +31,11 @@ test_expect_success 'all' '
test-tool path-walk -- --all >out &&
cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ COMMIT::$(git rev-parse base)
+ COMMIT::$(git rev-parse base~1)
+ COMMIT::$(git rev-parse base~2)
+ commits:4
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base^{tree})
TREE::$(git rev-parse base~1^{tree})
@@ -60,6 +65,10 @@ test_expect_success 'topic only' '
test-tool path-walk -- topic >out &&
cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ COMMIT::$(git rev-parse base~1)
+ COMMIT::$(git rev-parse base~2)
+ commits:3
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree})
TREE::$(git rev-parse base~2^{tree})
@@ -86,6 +95,8 @@ test_expect_success 'topic, not base' '
test-tool path-walk -- topic --not base >out &&
cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ commits:1
TREE::$(git rev-parse topic^{tree})
TREE:left/:$(git rev-parse topic:left)
TREE:right/:$(git rev-parse topic:right)
@@ -103,10 +114,71 @@ test_expect_success 'topic, not base' '
test_cmp expect.sorted out.sorted
'
+test_expect_success 'topic, not base, only blobs' '
+ test-tool path-walk --no-trees --no-commits \
+ -- topic --not base >out &&
+
+ cat >expect <<-EOF &&
+ commits:0
+ trees:0
+ BLOB:a:$(git rev-parse topic:a)
+ BLOB:left/b:$(git rev-parse topic:left/b)
+ BLOB:right/c:$(git rev-parse topic:right/c)
+ BLOB:right/d:$(git rev-parse topic:right/d)
+ blobs:4
+ EOF
+
+ sort expect >expect.sorted &&
+ sort out >out.sorted &&
+
+ test_cmp expect.sorted out.sorted
+'
+
+# No, this doesn't make a lot of sense for the path-walk API,
+# but it is possible to do.
+test_expect_success 'topic, not base, only commits' '
+ test-tool path-walk --no-blobs --no-trees \
+ -- topic --not base >out &&
+
+ cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ commits:1
+ trees:0
+ blobs:0
+ EOF
+
+ sort expect >expect.sorted &&
+ sort out >out.sorted &&
+
+ test_cmp expect.sorted out.sorted
+'
+
+test_expect_success 'topic, not base, only trees' '
+ test-tool path-walk --no-blobs --no-commits \
+ -- topic --not base >out &&
+
+ cat >expect <<-EOF &&
+ commits:0
+ TREE::$(git rev-parse topic^{tree})
+ TREE:left/:$(git rev-parse topic:left)
+ TREE:right/:$(git rev-parse topic:right)
+ trees:3
+ blobs:0
+ EOF
+
+ sort expect >expect.sorted &&
+ sort out >out.sorted &&
+
+ test_cmp expect.sorted out.sorted
+'
+
test_expect_success 'topic, not base, boundary' '
test-tool path-walk -- --boundary topic --not base >out &&
cat >expect <<-EOF &&
+ COMMIT::$(git rev-parse topic)
+ COMMIT::$(git rev-parse base~1)
+ commits:2
TREE::$(git rev-parse topic^{tree})
TREE::$(git rev-parse base~1^{tree})
TREE:left/:$(git rev-parse base~1:left)
--
gitgitgadget
next prev parent reply other threads:[~2024-10-08 14:12 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-08 14:11 [PATCH 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` Derrick Stolee via GitGitGadget [this message]
2024-10-08 14:11 ` [PATCH 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 06/17] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-28 19:54 ` Jonathan Tan
2024-10-29 18:07 ` Taylor Blau
2024-10-29 21:36 ` Jonathan Tan
2024-10-29 22:16 ` Taylor Blau
2024-10-31 2:04 ` Derrick Stolee
2024-10-31 2:14 ` Derrick Stolee
2024-10-31 21:02 ` Taylor Blau
2024-10-31 2:12 ` Derrick Stolee
2024-10-08 14:11 ` [PATCH 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-08 14:11 ` [PATCH 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 16/17] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2024-10-08 14:12 ` [PATCH 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 01/17] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 02/17] t6601: add helper for testing path-walk API Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 03/17] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 04/17] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 05/17] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 06/17] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 07/17] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 08/17] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 09/17] pack-objects: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 10/17] p5313: add performance tests for --path-walk Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 11/17] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 12/17] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 13/17] repack: update usage to match docs Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 14/17] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 15/17] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 16/17] pack-objects: refactor path-walk delta phase Derrick Stolee via GitGitGadget
2024-10-20 13:43 ` [PATCH v2 17/17] pack-objects: thread the path-based compression Derrick Stolee via GitGitGadget
2024-10-21 21:43 ` [PATCH v2 00/17] pack-objects: add --path-walk option for better deltas Taylor Blau
2024-10-24 13:29 ` Derrick Stolee
2024-10-24 15:52 ` Taylor Blau
2024-10-28 5:46 ` Patrick Steinhardt
2024-10-28 16:47 ` Taylor Blau
2024-10-28 17:13 ` Derrick Stolee
2024-10-28 17:25 ` Taylor Blau
2024-10-28 19:46 ` Derrick Stolee
2024-10-29 18:02 ` Taylor Blau
2024-10-31 2:28 ` Derrick Stolee
2024-10-31 21:07 ` Taylor Blau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=14375d19392c406d903f6cb26be5a39c4c2ff1e9.1728396724.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=christian.couder@gmail.com \
--cc=derrickstolee@github.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=kristofferhaugsbakk@fastmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).