From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <stolee@gmail.com>
Subject: [PATCH 19/30] path-walk: add prune_all_uninteresting option
Date: Tue, 10 Sep 2024 02:28:44 +0000 [thread overview]
Message-ID: <78168d98bfc0df7151eac5280e12b95c9fb694ec.1725935335.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1786.git.1725935335.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail.com>
This option causes the path-walk API to act like the sparse tree-walk
algorithm implemented by mark_trees_uninteresting_sparse() in
list-objects.c.
Starting from the commits marked as UNINTERESTING, their root trees and
all objects reachable from those trees are UNINTERSTING, at least as we
walk path-by-path. When we reach a path where all objects associated
with that path are marked UNINTERESTING, then do no continue walking the
children of that path.
We need to be careful to pass the UNINTERESTING flag in a deep way on
the UNINTERESTING objects before we start the path-walk, or else the
depth-first search for the path-walk API may accidentally report some
objects as interesting.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
path-walk.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++---
path-walk.h | 8 +++++++
2 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/path-walk.c b/path-walk.c
index 65f9856afa2..08de29614f7 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -23,6 +23,7 @@ struct type_and_oid_list
{
enum object_type type;
struct oid_array oids;
+ int maybe_interesting;
};
#define TYPE_AND_OID_LIST_INIT { \
@@ -139,6 +140,9 @@ static int add_children(struct path_walk_context *ctx,
list->type = type;
strmap_put(&ctx->paths_to_lists, path.buf, list);
string_list_append(&ctx->path_stack, path.buf);
+
+ if (!(o->flags & UNINTERESTING))
+ list->maybe_interesting = 1;
}
oid_array_append(&list->oids, &entry.oid);
}
@@ -161,6 +165,40 @@ static int walk_path(struct path_walk_context *ctx,
list = strmap_get(&ctx->paths_to_lists, path);
+ if (ctx->info->prune_all_uninteresting) {
+ /*
+ * This is true if all objects were UNINTERESTING
+ * when added to the list.
+ */
+ if (!list->maybe_interesting)
+ return 0;
+
+ /*
+ * But it's still possible that the objects were set
+ * as UNINTERESTING after being added. Do a quick check.
+ */
+ list->maybe_interesting = 0;
+ for (size_t i = 0;
+ !list->maybe_interesting && i < list->oids.nr;
+ i++) {
+ if (list->type == OBJ_TREE) {
+ struct tree *t = lookup_tree(ctx->repo,
+ &list->oids.oid[i]);
+ if (t && !(t->object.flags & UNINTERESTING))
+ list->maybe_interesting = 1;
+ } else {
+ struct blob *b = lookup_blob(ctx->repo,
+ &list->oids.oid[i]);
+ if (b && !(b->object.flags & UNINTERESTING))
+ list->maybe_interesting = 1;
+ }
+ }
+
+ /* We have confirmed that all objects are UNINTERESTING. */
+ if (!list->maybe_interesting)
+ return 0;
+ }
+
/* Evaluate function pointer on this data, if requested. */
if ((list->type == OBJ_TREE && ctx->info->trees) ||
(list->type == OBJ_BLOB && ctx->info->blobs))
@@ -203,7 +241,7 @@ static void clear_strmap(struct strmap *map)
int walk_objects_by_path(struct path_walk_info *info)
{
const char *root_path = "";
- int ret = 0;
+ int ret = 0, has_uninteresting = 0;
size_t commits_nr = 0, paths_nr = 0;
struct commit *c;
struct type_and_oid_list *root_tree_list;
@@ -215,6 +253,7 @@ int walk_objects_by_path(struct path_walk_info *info)
.path_stack = STRING_LIST_INIT_DUP,
.paths_to_lists = STRMAP_INIT
};
+ struct oidset root_tree_set = OIDSET_INIT;
struct oid_array tagged_tree_list = OID_ARRAY_INIT;
struct oid_array tagged_blob_list = OID_ARRAY_INIT;
@@ -227,7 +266,9 @@ int walk_objects_by_path(struct path_walk_info *info)
/* Insert a single list for the root tree into the paths. */
CALLOC_ARRAY(root_tree_list, 1);
root_tree_list->type = OBJ_TREE;
+ root_tree_list->maybe_interesting = 1;
strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
+
if (prepare_revision_walk(info->revs))
die(_("failed to setup revision walk"));
@@ -247,11 +288,17 @@ int walk_objects_by_path(struct path_walk_info *info)
oid = get_commit_tree_oid(c);
t = lookup_tree(info->revs->repo, oid);
- if (t)
+ if (t) {
+ oidset_insert(&root_tree_set, oid);
oid_array_append(&root_tree_list->oids, oid);
- else
+ } else {
warning("could not find tree %s", oid_to_hex(oid));
+ }
+ if (t && (c->object.flags & UNINTERESTING)) {
+ t->object.flags |= UNINTERESTING;
+ has_uninteresting = 1;
+ }
}
trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
@@ -318,6 +365,21 @@ int walk_objects_by_path(struct path_walk_info *info)
oid_array_clear(&tagged_blob_list);
}
+ /*
+ * Before performing a DFS of our paths and emitting them as interesting,
+ * do a full walk of the trees to distribute the UNINTERESTING bit. Use
+ * the sparse algorithm if prune_all_uninteresting was set.
+ */
+ if (has_uninteresting) {
+ trace2_region_enter("path-walk", "uninteresting-walk", info->revs->repo);
+ if (info->prune_all_uninteresting)
+ mark_trees_uninteresting_sparse(ctx.repo, &root_tree_set);
+ else
+ mark_trees_uninteresting_dense(ctx.repo, &root_tree_set);
+ trace2_region_leave("path-walk", "uninteresting-walk", info->revs->repo);
+ }
+ oidset_clear(&root_tree_set);
+
string_list_append(&ctx.path_stack, root_path);
trace2_region_enter("path-walk", "path-walk", info->revs->repo);
diff --git a/path-walk.h b/path-walk.h
index 637d3b0cabb..7c02bca7156 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -50,6 +50,14 @@ struct path_walk_info {
* the sparse-checkout patterns.
*/
struct pattern_list *pl;
+
+ /**
+ * When 'prune_all_uninteresting' is set and a path has all objects
+ * marked as UNINTERESTING, then the path-walk will not visit those
+ * objects. It will not call path_fn on those objects and will not
+ * walk the children of such trees.
+ */
+ int prune_all_uninteresting;
};
#define PATH_WALK_INFO_INIT { \
--
gitgitgadget
next prev parent reply other threads:[~2024-09-10 2:29 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-10 2:28 [PATCH 00/30] [RFC] Path-walk API and applications Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 01/30] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 02/30] backfill: add builtin boilerplate Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 03/30] backfill: basic functionality and tests Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 04/30] backfill: add --batch-size=<n> option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 05/30] backfill: add --sparse option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 06/30] backfill: assume --sparse when sparse-checkout is enabled Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 07/30] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 08/30] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 09/30] survey: stub in new experimental `git-survey` command Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 10/30] survey: add command line opts to select references Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 11/30] survey: collect the set of requested refs Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 12/30] survey: start pretty printing data in table form Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 13/30] survey: add object count summary Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 14/30] survey: summarize total sizes by object type Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 15/30] survey: show progress during object walk Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 16/30] survey: add ability to track prioritized lists Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 17/30] survey: add report of "largest" paths Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 18/30] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` Derrick Stolee via GitGitGadget [this message]
2024-09-10 2:28 ` [PATCH 20/30] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 21/30] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 22/30] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 23/30] p5313: add size comparison test Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 24/30] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 25/30] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 26/30] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 27/30] pack-objects: add --full-name-hash option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 28/30] test-name-hash: add helper to compute name-hash functions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 29/30] p5314: add a size test for name-hash collisions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 30/30] pack-objects: output debug info about deltas Derrick Stolee via GitGitGadget
2024-09-11 21:32 ` [PATCH 00/30] [RFC] Path-walk API and applications Junio C Hamano
2024-09-17 10:41 ` Christian Couder
2024-09-18 23:18 ` Derrick Stolee
2024-09-22 18:37 ` Junio C Hamano
2024-09-23 1:22 ` Derrick Stolee
2024-09-23 16:56 ` Junio C Hamano
2024-09-22 21:08 ` Kristoffer Haugsbakk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=78168d98bfc0df7151eac5280e12b95c9fb694ec.1725935335.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).