From: "Jeff Hostetler via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, Derrick Stolee <stolee@gmail.com>,
Jeff Hostetler <jeffhostetler@github.com>
Subject: [PATCH 11/30] survey: collect the set of requested refs
Date: Tue, 10 Sep 2024 02:28:36 +0000 [thread overview]
Message-ID: <efa1793a5729b152b8961238dd834a26275e969a.1725935335.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1786.git.1725935335.gitgitgadget@gmail.com>
From: Jeff Hostetler <jeffhostetler@github.com>
Collect the set of requested branches, tags, and etc into a ref_array and
collect the set of requested patterns into a strvec.
RFC TODO: This patch has some changes that should be in the previous patch,
to make the diff look a lot better.
Co-authored-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/survey.c | 258 ++++++++++++++++++++++++++++++++++--------
t/t8100-git-survey.sh | 9 ++
2 files changed, 217 insertions(+), 50 deletions(-)
diff --git a/builtin/survey.c b/builtin/survey.c
index e0e844201de..1b4fe591e59 100644
--- a/builtin/survey.c
+++ b/builtin/survey.c
@@ -1,6 +1,12 @@
#include "builtin.h"
#include "config.h"
+#include "object.h"
+#include "object-store-ll.h"
#include "parse-options.h"
+#include "progress.h"
+#include "ref-filter.h"
+#include "strvec.h"
+#include "trace2.h"
static const char * const survey_usage[] = {
N_("(EXPERIMENTAL!) git survey <options>"),
@@ -17,18 +23,8 @@ struct survey_refs_wanted {
int want_other; /* see FILTER_REFS_OTHERS -- refs/notes/, refs/stash/ */
};
-/*
- * The set of refs that we will search if the user doesn't select
- * any on the command line.
- */
-static struct survey_refs_wanted refs_if_unspecified = {
- .want_all_refs = 0,
-
- .want_branches = 1,
- .want_tags = 1,
- .want_remotes = 1,
- .want_detached = 0,
- .want_other = 0,
+static struct survey_refs_wanted default_ref_options = {
+ .want_all_refs = 1,
};
struct survey_opts {
@@ -37,19 +33,51 @@ struct survey_opts {
struct survey_refs_wanted refs;
};
-static struct survey_opts survey_opts = {
- .verbose = 0,
- .show_progress = -1, /* defaults to isatty(2) */
+struct survey_report_ref_summary {
+ size_t refs_nr;
+ size_t branches_nr;
+ size_t remote_refs_nr;
+ size_t tags_nr;
+ size_t tags_annotated_nr;
+ size_t others_nr;
+ size_t unknown_nr;
+};
+
+/**
+ * This struct contains all of the information that needs to be printed
+ * at the end of the exploration of the repository and its references.
+ */
+struct survey_report {
+ struct survey_report_ref_summary refs;
+};
+
+struct survey_context {
+ /* Options that control what is done. */
+ struct survey_opts opts;
+
+ /* Info for output only. */
+ struct survey_report report;
- .refs.want_all_refs = -1,
+ /*
+ * The rest of the members are about enabling the activity
+ * of the 'git survey' command, including ref listings, object
+ * pointers, and progress.
+ */
+
+ struct repository *repo;
+
+ struct progress *progress;
+ size_t progress_nr;
+ size_t progress_total;
- .refs.want_branches = -1, /* default these to undefined */
- .refs.want_tags = -1,
- .refs.want_remotes = -1,
- .refs.want_detached = -1,
- .refs.want_other = -1,
+ struct strvec refs;
};
+static void clear_survey_context(struct survey_context *ctx)
+{
+ strvec_clear(&ctx->refs);
+}
+
/*
* After parsing the command line arguments, figure out which refs we
* should scan.
@@ -57,9 +85,9 @@ static struct survey_opts survey_opts = {
* If ANY were given in positive sense, then we ONLY include them and
* do not use the builtin values.
*/
-static void fixup_refs_wanted(void)
+static void fixup_refs_wanted(struct survey_context *ctx)
{
- struct survey_refs_wanted *rw = &survey_opts.refs;
+ struct survey_refs_wanted *rw = &ctx->opts.refs;
/*
* `--all-refs` overrides and enables everything.
@@ -82,7 +110,7 @@ static void fixup_refs_wanted(void)
rw->want_remotes == -1 &&
rw->want_detached == -1 &&
rw->want_other == -1) {
- *rw = refs_if_unspecified;
+ *rw = default_ref_options;
return;
}
@@ -106,54 +134,184 @@ static void fixup_refs_wanted(void)
rw->want_other = 0;
}
-static struct option survey_options[] = {
- OPT__VERBOSE(&survey_opts.verbose, N_("verbose output")),
- OPT_BOOL(0, "progress", &survey_opts.show_progress, N_("show progress")),
-
- OPT_BOOL_F(0, "all-refs", &survey_opts.refs.want_all_refs, N_("include all refs"), PARSE_OPT_NONEG),
-
- OPT_BOOL_F(0, "branches", &survey_opts.refs.want_branches, N_("include branches"), PARSE_OPT_NONEG),
- OPT_BOOL_F(0, "tags", &survey_opts.refs.want_tags, N_("include tags"), PARSE_OPT_NONEG),
- OPT_BOOL_F(0, "remotes", &survey_opts.refs.want_remotes, N_("include all remotes refs"), PARSE_OPT_NONEG),
- OPT_BOOL_F(0, "detached", &survey_opts.refs.want_detached, N_("include detached HEAD"), PARSE_OPT_NONEG),
- OPT_BOOL_F(0, "other", &survey_opts.refs.want_other, N_("include notes and stashes"), PARSE_OPT_NONEG),
-
- OPT_END(),
-};
-
static int survey_load_config_cb(const char *var, const char *value,
- const struct config_context *ctx, void *pvoid)
+ const struct config_context *cctx, void *pvoid)
{
+ struct survey_context *sctx = pvoid;
if (!strcmp(var, "survey.verbose")) {
- survey_opts.verbose = git_config_bool(var, value);
+ sctx->opts.verbose = git_config_bool(var, value);
return 0;
}
if (!strcmp(var, "survey.progress")) {
- survey_opts.show_progress = git_config_bool(var, value);
+ sctx->opts.show_progress = git_config_bool(var, value);
return 0;
}
- return git_default_config(var, value, ctx, pvoid);
+ return git_default_config(var, value, cctx, pvoid);
}
-static void survey_load_config(void)
+static void survey_load_config(struct survey_context *ctx)
{
- git_config(survey_load_config_cb, NULL);
+ git_config(survey_load_config_cb, ctx);
+}
+
+static void do_load_refs(struct survey_context *ctx,
+ struct ref_array *ref_array)
+{
+ struct ref_filter filter = REF_FILTER_INIT;
+ struct ref_sorting *sorting;
+ struct string_list sorting_options = STRING_LIST_INIT_DUP;
+
+ string_list_append(&sorting_options, "objectname");
+ sorting = ref_sorting_options(&sorting_options);
+
+ if (ctx->opts.refs.want_detached)
+ strvec_push(&ctx->refs, "HEAD");
+
+ if (ctx->opts.refs.want_all_refs) {
+ strvec_push(&ctx->refs, "refs/");
+ } else {
+ if (ctx->opts.refs.want_branches)
+ strvec_push(&ctx->refs, "refs/heads/");
+ if (ctx->opts.refs.want_tags)
+ strvec_push(&ctx->refs, "refs/tags/");
+ if (ctx->opts.refs.want_remotes)
+ strvec_push(&ctx->refs, "refs/remotes/");
+ if (ctx->opts.refs.want_other) {
+ strvec_push(&ctx->refs, "refs/notes/");
+ strvec_push(&ctx->refs, "refs/stash/");
+ }
+ }
+
+ filter.name_patterns = ctx->refs.v;
+ filter.ignore_case = 0;
+ filter.match_as_path = 1;
+
+ if (ctx->opts.show_progress) {
+ ctx->progress_total = 0;
+ ctx->progress = start_progress(_("Scanning refs..."), 0);
+ }
+
+ filter_refs(ref_array, &filter, FILTER_REFS_KIND_MASK);
+
+ if (ctx->opts.show_progress) {
+ ctx->progress_total = ref_array->nr;
+ display_progress(ctx->progress, ctx->progress_total);
+ }
+
+ ref_array_sort(sorting, ref_array);
+
+ stop_progress(&ctx->progress);
+ ref_filter_clear(&filter);
+ ref_sorting_release(sorting);
+}
+
+/*
+ * The REFS phase:
+ *
+ * Load the set of requested refs and assess them for scalablity problems.
+ * Use that set to start a treewalk to all reachable objects and assess
+ * them.
+ *
+ * This data will give us insights into the repository itself (the number
+ * of refs, the size and shape of the DAG, the number and size of the
+ * objects).
+ *
+ * Theoretically, this data is independent of the on-disk representation
+ * (e.g. independent of packing concerns).
+ */
+static void survey_phase_refs(struct survey_context *ctx)
+{
+ struct ref_array ref_array = { 0 };
+
+ trace2_region_enter("survey", "phase/refs", ctx->repo);
+ do_load_refs(ctx, &ref_array);
+
+ ctx->report.refs.refs_nr = ref_array.nr;
+ for (size_t i = 0; i < ref_array.nr; i++) {
+ size_t size;
+ struct ref_array_item *item = ref_array.items[i];
+
+ switch (item->kind) {
+ case FILTER_REFS_TAGS:
+ ctx->report.refs.tags_nr++;
+ if (oid_object_info(ctx->repo,
+ &item->objectname,
+ &size) == OBJ_TAG)
+ ctx->report.refs.tags_annotated_nr++;
+ break;
+
+ case FILTER_REFS_BRANCHES:
+ ctx->report.refs.branches_nr++;
+ break;
+
+ case FILTER_REFS_REMOTES:
+ ctx->report.refs.remote_refs_nr++;
+ break;
+
+ case FILTER_REFS_OTHERS:
+ ctx->report.refs.others_nr++;
+ break;
+
+ default:
+ ctx->report.refs.unknown_nr++;
+ break;
+ }
+ }
+
+ trace2_region_leave("survey", "phase/refs", ctx->repo);
+
+ ref_array_clear(&ref_array);
}
int cmd_survey(int argc, const char **argv, const char *prefix)
{
+ static struct survey_context ctx = {
+ .opts = {
+ .verbose = 0,
+ .show_progress = -1, /* defaults to isatty(2) */
+
+ .refs.want_all_refs = -1,
+
+ .refs.want_branches = -1, /* default these to undefined */
+ .refs.want_tags = -1,
+ .refs.want_remotes = -1,
+ .refs.want_detached = -1,
+ .refs.want_other = -1,
+ },
+ .refs = STRVEC_INIT,
+ };
+
+ static struct option survey_options[] = {
+ OPT__VERBOSE(&ctx.opts.verbose, N_("verbose output")),
+ OPT_BOOL(0, "progress", &ctx.opts.show_progress, N_("show progress")),
+
+ OPT_BOOL_F(0, "all-refs", &ctx.opts.refs.want_all_refs, N_("include all refs"), PARSE_OPT_NONEG),
+
+ OPT_BOOL_F(0, "branches", &ctx.opts.refs.want_branches, N_("include branches"), PARSE_OPT_NONEG),
+ OPT_BOOL_F(0, "tags", &ctx.opts.refs.want_tags, N_("include tags"), PARSE_OPT_NONEG),
+ OPT_BOOL_F(0, "remotes", &ctx.opts.refs.want_remotes, N_("include all remotes refs"), PARSE_OPT_NONEG),
+ OPT_BOOL_F(0, "detached", &ctx.opts.refs.want_detached, N_("include detached HEAD"), PARSE_OPT_NONEG),
+ OPT_BOOL_F(0, "other", &ctx.opts.refs.want_other, N_("include notes and stashes"), PARSE_OPT_NONEG),
+
+ OPT_END(),
+ };
+
if (argc == 2 && !strcmp(argv[1], "-h"))
usage_with_options(survey_usage, survey_options);
- prepare_repo_settings(the_repository);
- survey_load_config();
+ ctx.repo = the_repository;
+ prepare_repo_settings(ctx.repo);
+ survey_load_config(&ctx);
argc = parse_options(argc, argv, prefix, survey_options, survey_usage, 0);
- if (survey_opts.show_progress < 0)
- survey_opts.show_progress = isatty(2);
- fixup_refs_wanted();
+ if (ctx.opts.show_progress < 0)
+ ctx.opts.show_progress = isatty(2);
+ fixup_refs_wanted(&ctx);
+
+ survey_phase_refs(&ctx);
+ clear_survey_context(&ctx);
return 0;
}
diff --git a/t/t8100-git-survey.sh b/t/t8100-git-survey.sh
index 2df7fa83629..5903c90cb57 100755
--- a/t/t8100-git-survey.sh
+++ b/t/t8100-git-survey.sh
@@ -15,4 +15,13 @@ test_expect_success 'git survey -h shows experimental warning' '
grep "EXPERIMENTAL!" usage
'
+test_expect_success 'creat a semi-interesting repo' '
+ test_commit_bulk 10
+'
+
+test_expect_success 'git survey (default)' '
+ git survey >out 2>err &&
+ test_line_count = 0 err
+'
+
test_done
--
gitgitgadget
next prev parent reply other threads:[~2024-09-10 2:29 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-10 2:28 [PATCH 00/30] [RFC] Path-walk API and applications Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 01/30] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 02/30] backfill: add builtin boilerplate Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 03/30] backfill: basic functionality and tests Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 04/30] backfill: add --batch-size=<n> option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 05/30] backfill: add --sparse option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 06/30] backfill: assume --sparse when sparse-checkout is enabled Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 07/30] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 08/30] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 09/30] survey: stub in new experimental `git-survey` command Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 10/30] survey: add command line opts to select references Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` Jeff Hostetler via GitGitGadget [this message]
2024-09-10 2:28 ` [PATCH 12/30] survey: start pretty printing data in table form Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 13/30] survey: add object count summary Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 14/30] survey: summarize total sizes by object type Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 15/30] survey: show progress during object walk Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 16/30] survey: add ability to track prioritized lists Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 17/30] survey: add report of "largest" paths Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 18/30] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 19/30] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 20/30] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 21/30] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 22/30] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 23/30] p5313: add size comparison test Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 24/30] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 25/30] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 26/30] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 27/30] pack-objects: add --full-name-hash option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 28/30] test-name-hash: add helper to compute name-hash functions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 29/30] p5314: add a size test for name-hash collisions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 30/30] pack-objects: output debug info about deltas Derrick Stolee via GitGitGadget
2024-09-11 21:32 ` [PATCH 00/30] [RFC] Path-walk API and applications Junio C Hamano
2024-09-17 10:41 ` Christian Couder
2024-09-18 23:18 ` Derrick Stolee
2024-09-22 18:37 ` Junio C Hamano
2024-09-23 1:22 ` Derrick Stolee
2024-09-23 16:56 ` Junio C Hamano
2024-09-22 21:08 ` Kristoffer Haugsbakk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=efa1793a5729b152b8961238dd834a26275e969a.1725935335.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jeffhostetler@github.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).