From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 07/30] path-walk: allow consumer to specify object types
Date: Tue, 10 Sep 2024 02:28:32 +0000 [thread overview]
Message-ID: <2829fe3875438f3a9907f36d825d6c24952abded.1725935335.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1786.git.1725935335.gitgitgadget@gmail.com>
From: Derrick Stolee <derrickstolee@github.com>
This adds the ability to ask for the commits as a single list. This will
also reduce the calls in 'git backfill' to be a BUG() statement if called
with anything other than blobs.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/backfill.c | 2 +-
path-walk.c | 40 ++++++++++++++++++++++++++++++++++------
path-walk.h | 12 +++++++++++-
3 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/builtin/backfill.c b/builtin/backfill.c
index 82a18e58a41..2a1b043f188 100644
--- a/builtin/backfill.c
+++ b/builtin/backfill.c
@@ -61,7 +61,7 @@ static int fill_missing_blobs(const char *path,
struct backfill_context *ctx = data;
if (type != OBJ_BLOB)
- return 0;
+ BUG("fill_missing_blobs only takes blob objects");
for (size_t i = 0; i < list->nr; i++) {
off_t size = 0;
diff --git a/path-walk.c b/path-walk.c
index dc2390dd9ea..d70e6840fb5 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -83,6 +83,10 @@ static int add_children(struct path_walk_context *ctx,
if (S_ISGITLINK(entry.mode))
continue;
+ /* If the caller doesn't want blobs, then don't bother. */
+ if (!ctx->info->blobs && type == OBJ_BLOB)
+ continue;
+
if (type == OBJ_TREE) {
struct tree *child = lookup_tree(ctx->repo, &entry.oid);
o = child ? &child->object : NULL;
@@ -156,9 +160,11 @@ static int walk_path(struct path_walk_context *ctx,
list = strmap_get(&ctx->paths_to_lists, path);
- /* Evaluate function pointer on this data. */
- ret = ctx->info->path_fn(path, &list->oids, list->type,
- ctx->info->path_fn_data);
+ /* Evaluate function pointer on this data, if requested. */
+ if ((list->type == OBJ_TREE && ctx->info->trees) ||
+ (list->type == OBJ_BLOB && ctx->info->blobs))
+ ret = ctx->info->path_fn(path, &list->oids, list->type,
+ ctx->info->path_fn_data);
/* Expand data for children. */
if (list->type == OBJ_TREE) {
@@ -200,6 +206,7 @@ int walk_objects_by_path(struct path_walk_info *info)
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
+ struct type_and_oid_list *commit_list;
struct path_walk_context ctx = {
.repo = info->revs->repo,
.revs = info->revs,
@@ -210,28 +217,49 @@ int walk_objects_by_path(struct path_walk_info *info)
trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
+ CALLOC_ARRAY(commit_list, 1);
+ commit_list->type = OBJ_COMMIT;
+
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
-
if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));
while ((c = get_revision(info->revs))) {
- struct object_id *oid = get_commit_tree_oid(c);
- struct tree *t = lookup_tree(info->revs->repo, oid);
+ struct object_id *oid;
+ struct tree *t;
commits_nr++;
+ if (info->commits)
+ oid_array_append(&commit_list->oids,
+ &c->object.oid);
+
+ /* If we only care about commits, then skip trees. */
+ if (!info->trees && !info->blobs)
+ continue;
+
+ oid = get_commit_tree_oid(c);
+ t = lookup_tree(info->revs->repo, oid);
+
if (t)
oid_array_append(&root_tree_list->oids, oid);
else
warning("could not find tree %s", oid_to_hex(oid));
+
}
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
+ /* Track all commits. */
+ if (info->commits)
+ ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
+ info->path_fn_data);
+ oid_array_clear(&commit_list->oids);
+ free(commit_list);
+
string_list_append(&ctx.path_stack, root_path);
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
diff --git a/path-walk.h b/path-walk.h
index bc1ebba5081..49b982dade6 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -32,6 +32,14 @@ struct path_walk_info {
path_fn path_fn;
void *path_fn_data;
+ /**
+ * Initialize which object types the path_fn should be called on. This
+ * could also limit the walk to skip blobs if not set.
+ */
+ int commits;
+ int trees;
+ int blobs;
+
/**
* Specify a sparse-checkout definition to match our paths to. Do not
* walk outside of this sparse definition. If the patterns are in
@@ -43,7 +51,9 @@ struct path_walk_info {
struct pattern_list *pl;
};
-#define PATH_WALK_INFO_INIT { 0 }
+#define PATH_WALK_INFO_INIT { \
+ .blobs = 1, \
+}
/**
* Given the configuration of 'info', walk the commits based on 'info->revs' and
--
gitgitgadget
next prev parent reply other threads:[~2024-09-10 2:29 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-10 2:28 [PATCH 00/30] [RFC] Path-walk API and applications Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 01/30] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 02/30] backfill: add builtin boilerplate Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 03/30] backfill: basic functionality and tests Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 04/30] backfill: add --batch-size=<n> option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 05/30] backfill: add --sparse option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 06/30] backfill: assume --sparse when sparse-checkout is enabled Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` Derrick Stolee via GitGitGadget [this message]
2024-09-10 2:28 ` [PATCH 08/30] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 09/30] survey: stub in new experimental `git-survey` command Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 10/30] survey: add command line opts to select references Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 11/30] survey: collect the set of requested refs Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 12/30] survey: start pretty printing data in table form Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 13/30] survey: add object count summary Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 14/30] survey: summarize total sizes by object type Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 15/30] survey: show progress during object walk Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 16/30] survey: add ability to track prioritized lists Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 17/30] survey: add report of "largest" paths Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 18/30] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 19/30] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 20/30] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 21/30] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 22/30] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 23/30] p5313: add size comparison test Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 24/30] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 25/30] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 26/30] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 27/30] pack-objects: add --full-name-hash option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 28/30] test-name-hash: add helper to compute name-hash functions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 29/30] p5314: add a size test for name-hash collisions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 30/30] pack-objects: output debug info about deltas Derrick Stolee via GitGitGadget
2024-09-11 21:32 ` [PATCH 00/30] [RFC] Path-walk API and applications Junio C Hamano
2024-09-17 10:41 ` Christian Couder
2024-09-18 23:18 ` Derrick Stolee
2024-09-22 18:37 ` Junio C Hamano
2024-09-23 1:22 ` Derrick Stolee
2024-09-23 16:56 ` Junio C Hamano
2024-09-22 21:08 ` Kristoffer Haugsbakk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2829fe3875438f3a9907f36d825d6c24952abded.1725935335.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=derrickstolee@github.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).