From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
ps@pks.im, me@ttaylorr.com, johncai86@gmail.com,
newren@gmail.com, Derrick Stolee <stolee@gmail.com>,
Derrick Stolee <stolee@gmail.com>
Subject: [PATCH 13/30] survey: add object count summary
Date: Tue, 10 Sep 2024 02:28:38 +0000 [thread overview]
Message-ID: <fcc281ac2bfabb6d19e6be40c41157612c5a3f83.1725935335.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1786.git.1725935335.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail.com>
At the moment, nothing is obvious about the reason for the use of the
path-walk API, but this will become more prevelant in future iterations. For
now, use the path-walk API to sum up the counts of each kind of object.
For example, this is the reachable object summary output for my local repo:
REACHABLE OBJECT SUMMARY
========================
Object Type | Count
------------+-------
Tags | 0
Commits | 178573
Trees | 312745
Blobs | 183035
(Note: the "Tags" are zero right now because the path-walk API has not been
integrated to walk tags yet. This will be fixed in a later change.)
RFC TODO: make sure tags are walked before this change.
Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
builtin/survey.c | 196 ++++++++++++++++++++++++++++++++++++++++--
t/t8100-git-survey.sh | 26 ++++--
2 files changed, 209 insertions(+), 13 deletions(-)
diff --git a/builtin/survey.c b/builtin/survey.c
index b2104e84d61..504b4edafce 100644
--- a/builtin/survey.c
+++ b/builtin/survey.c
@@ -1,12 +1,19 @@
#include "builtin.h"
#include "config.h"
+#include "environment.h"
+#include "hex.h"
#include "object.h"
+#include "object-name.h"
#include "object-store-ll.h"
#include "parse-options.h"
+#include "path-walk.h"
#include "progress.h"
#include "ref-filter.h"
+#include "refs.h"
+#include "revision.h"
#include "strbuf.h"
#include "strvec.h"
+#include "tag.h"
#include "trace2.h"
static const char * const survey_usage[] = {
@@ -50,12 +57,20 @@ struct survey_report_ref_summary {
size_t unknown_nr;
};
+struct survey_report_object_summary {
+ size_t commits_nr;
+ size_t tags_nr;
+ size_t trees_nr;
+ size_t blobs_nr;
+};
+
/**
* This struct contains all of the information that needs to be printed
* at the end of the exploration of the repository and its references.
*/
struct survey_report {
struct survey_report_ref_summary refs;
+ struct survey_report_object_summary reachable_objects;
};
struct survey_context {
@@ -78,10 +93,12 @@ struct survey_context {
size_t progress_total;
struct strvec refs;
+ struct ref_array ref_array;
};
static void clear_survey_context(struct survey_context *ctx)
{
+ ref_array_clear(&ctx->ref_array);
strvec_clear(&ctx->refs);
}
@@ -125,10 +142,12 @@ static void print_table_title(const char *name, size_t *widths, size_t nr)
{
static struct strbuf lines = STRBUF_INIT;
size_t width = 0;
+ size_t min_width;
strbuf_setlen(&lines, 0);
- strbuf_addch(&lines, ' ');
+ strbuf_addch(&lines, '\n');
strbuf_addstr(&lines, name);
+ min_width = lines.len - 1;
strbuf_addch(&lines, '\n');
for (size_t i = 0; i < nr; i++) {
@@ -136,6 +155,10 @@ static void print_table_title(const char *name, size_t *widths, size_t nr)
width += 3;
width += widths[i];
}
+
+ if (width < min_width)
+ width = min_width;
+
strbuf_addchars(&lines, '=', width);
printf("%s\n", lines.buf);
}
@@ -228,11 +251,43 @@ static void survey_report_plaintext_refs(struct survey_context *ctx)
clear_table(&table);
}
+static void survey_report_plaintext_reachable_object_summary(struct survey_context *ctx)
+{
+ struct survey_report_object_summary *objs = &ctx->report.reachable_objects;
+ struct survey_table table = SURVEY_TABLE_INIT;
+ char *fmt;
+
+ table.table_name = _("REACHABLE OBJECT SUMMARY");
+
+ strvec_push(&table.header, _("Object Type"));
+ strvec_push(&table.header, _("Count"));
+
+ fmt = xstrfmt("%"PRIuMAX"", objs->tags_nr);
+ insert_table_rowv(&table, _("Tags"), fmt, NULL);
+ free(fmt);
+
+ fmt = xstrfmt("%"PRIuMAX"", objs->commits_nr);
+ insert_table_rowv(&table, _("Commits"), fmt, NULL);
+ free(fmt);
+
+ fmt = xstrfmt("%"PRIuMAX"", objs->trees_nr);
+ insert_table_rowv(&table, _("Trees"), fmt, NULL);
+ free(fmt);
+
+ fmt = xstrfmt("%"PRIuMAX"", objs->blobs_nr);
+ insert_table_rowv(&table, _("Blobs"), fmt, NULL);
+ free(fmt);
+
+ print_table_plaintext(&table);
+ clear_table(&table);
+}
+
static void survey_report_plaintext(struct survey_context *ctx)
{
printf("GIT SURVEY for \"%s\"\n", ctx->repo->worktree);
printf("-----------------------------------------------------\n");
survey_report_plaintext_refs(ctx);
+ survey_report_plaintext_reachable_object_summary(ctx);
}
static void survey_report_json(struct survey_context *ctx)
@@ -384,15 +439,13 @@ static void do_load_refs(struct survey_context *ctx,
*/
static void survey_phase_refs(struct survey_context *ctx)
{
- struct ref_array ref_array = { 0 };
-
trace2_region_enter("survey", "phase/refs", ctx->repo);
- do_load_refs(ctx, &ref_array);
+ do_load_refs(ctx, &ctx->ref_array);
- ctx->report.refs.refs_nr = ref_array.nr;
- for (size_t i = 0; i < ref_array.nr; i++) {
+ ctx->report.refs.refs_nr = ctx->ref_array.nr;
+ for (size_t i = 0; i < ctx->ref_array.nr; i++) {
size_t size;
- struct ref_array_item *item = ref_array.items[i];
+ struct ref_array_item *item = ctx->ref_array.items[i];
switch (item->kind) {
case FILTER_REFS_TAGS:
@@ -422,8 +475,133 @@ static void survey_phase_refs(struct survey_context *ctx)
}
trace2_region_leave("survey", "phase/refs", ctx->repo);
+}
+
+static void increment_object_counts(
+ struct survey_report_object_summary *summary,
+ enum object_type type,
+ size_t nr)
+{
+ switch (type) {
+ case OBJ_COMMIT:
+ summary->commits_nr += nr;
+ break;
+
+ case OBJ_TREE:
+ summary->trees_nr += nr;
+ break;
+
+ case OBJ_BLOB:
+ summary->blobs_nr += nr;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int survey_objects_path_walk_fn(const char *path,
+ struct oid_array *oids,
+ enum object_type type,
+ void *data)
+{
+ struct survey_context *ctx = data;
+
+ increment_object_counts(&ctx->report.reachable_objects,
+ type, oids->nr);
+
+ return 0;
+}
+
+static int iterate_tag_chain(struct survey_context *ctx,
+ struct object_id *oid,
+ struct object_id *peeled)
+{
+ struct object *o = lookup_unknown_object(ctx->repo, oid);
+ struct tag *t;
+
+ if (o->type != OBJ_TAG) {
+ oidcpy(peeled, &o->oid);
+ return o->type != OBJ_COMMIT;
+ }
+
+ t = lookup_tag(ctx->repo, oid);
+ while (t) {
+ parse_tag(t);
+ ctx->report.reachable_objects.tags_nr++;
+
+ if (!t->tagged)
+ break;
+
+ o = lookup_unknown_object(ctx->repo, &t->tagged->oid);
+ if (o && o->type == OBJ_TAG)
+ t = lookup_tag(ctx->repo, &t->tagged->oid);
+ else
+ break;
+ }
+
+ if (!t || !t->tagged)
+ return -1;
- ref_array_clear(&ref_array);
+ oidcpy(peeled, &t->tagged->oid);
+ o = lookup_unknown_object(ctx->repo, peeled);
+ if (o && o->type == OBJ_COMMIT)
+ return 0;
+ return -1;
+}
+
+static void survey_phase_objects(struct survey_context *ctx)
+{
+ struct rev_info revs = REV_INFO_INIT;
+ struct path_walk_info info = PATH_WALK_INFO_INIT;
+ unsigned int add_flags = 0;
+
+ trace2_region_enter("survey", "phase/objects", ctx->repo);
+
+ info.revs = &revs;
+ info.path_fn = survey_objects_path_walk_fn;
+ info.path_fn_data = ctx;
+
+ info.commits = 1;
+ info.trees = 1;
+ info.blobs = 1;
+ info.tags = 1;
+
+ repo_init_revisions(ctx->repo, &revs, "");
+
+ for (size_t i = 0; i < ctx->ref_array.nr; i++) {
+ struct ref_array_item *item = ctx->ref_array.items[i];
+ struct object_id peeled;
+
+ switch (item->kind) {
+ case FILTER_REFS_TAGS:
+ if (!iterate_tag_chain(ctx, &item->objectname, &peeled))
+ add_pending_oid(&revs, NULL, &peeled, add_flags);
+ break;
+ case FILTER_REFS_BRANCHES:
+ add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+ break;
+ case FILTER_REFS_REMOTES:
+ add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+ break;
+ case FILTER_REFS_OTHERS:
+ /*
+ * This may be a note, stash, or custom namespace branch.
+ */
+ add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+ break;
+ case FILTER_REFS_DETACHED_HEAD:
+ add_pending_oid(&revs, NULL, &item->objectname, add_flags);
+ break;
+ default:
+ break;
+ }
+ }
+
+ walk_objects_by_path(&info);
+
+ release_revisions(&revs);
+ trace2_region_leave("survey", "phase/objects", ctx->repo);
}
int cmd_survey(int argc, const char **argv, const char *prefix)
@@ -474,6 +652,8 @@ int cmd_survey(int argc, const char **argv, const char *prefix)
survey_phase_refs(&ctx);
+ survey_phase_objects(&ctx);
+
switch (ctx.opts.format) {
case SURVEY_PLAINTEXT:
survey_report_plaintext(&ctx);
diff --git a/t/t8100-git-survey.sh b/t/t8100-git-survey.sh
index a57f6ca7a59..0da92eafa95 100755
--- a/t/t8100-git-survey.sh
+++ b/t/t8100-git-survey.sh
@@ -16,24 +16,40 @@ test_expect_success 'git survey -h shows experimental warning' '
'
test_expect_success 'creat a semi-interesting repo' '
- test_commit_bulk 10
+ test_commit_bulk 10 &&
+ git tag -a -m one one HEAD~5 &&
+ git tag -a -m two two HEAD~3 &&
+ git tag -a -m three three two &&
+ git tag -a -m four four three &&
+ git update-ref -d refs/tags/three &&
+ git update-ref -d refs/tags/two
'
test_expect_success 'git survey (default)' '
- git survey >out 2>err &&
+ git survey --all-refs >out 2>err &&
test_line_count = 0 err &&
cat >expect <<-EOF &&
GIT SURVEY for "$(pwd)"
-----------------------------------------------------
- REFERENCES SUMMARY
+
+ REFERENCES SUMMARY
========================
Ref Type | Count
-----------------+------
Branches | 1
Remote refs | 0
- Tags (all) | 0
- Tags (annotated) | 0
+ Tags (all) | 2
+ Tags (annotated) | 2
+
+ REACHABLE OBJECT SUMMARY
+ ========================
+ Object Type | Count
+ ------------+------
+ Tags | 0
+ Commits | 10
+ Trees | 10
+ Blobs | 10
EOF
test_cmp expect out
--
gitgitgadget
next prev parent reply other threads:[~2024-09-10 2:29 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-10 2:28 [PATCH 00/30] [RFC] Path-walk API and applications Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 01/30] path-walk: introduce an object walk by path Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 02/30] backfill: add builtin boilerplate Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 03/30] backfill: basic functionality and tests Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 04/30] backfill: add --batch-size=<n> option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 05/30] backfill: add --sparse option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 06/30] backfill: assume --sparse when sparse-checkout is enabled Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 07/30] path-walk: allow consumer to specify object types Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 08/30] path-walk: allow visiting tags Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 09/30] survey: stub in new experimental `git-survey` command Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 10/30] survey: add command line opts to select references Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 11/30] survey: collect the set of requested refs Jeff Hostetler via GitGitGadget
2024-09-10 2:28 ` [PATCH 12/30] survey: start pretty printing data in table form Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` Derrick Stolee via GitGitGadget [this message]
2024-09-10 2:28 ` [PATCH 14/30] survey: summarize total sizes by object type Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 15/30] survey: show progress during object walk Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 16/30] survey: add ability to track prioritized lists Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 17/30] survey: add report of "largest" paths Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 18/30] revision: create mark_trees_uninteresting_dense() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 19/30] path-walk: add prune_all_uninteresting option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 20/30] pack-objects: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 21/30] pack-objects: extract should_attempt_deltas() Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 22/30] pack-objects: introduce GIT_TEST_PACK_PATH_WALK Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 23/30] p5313: add size comparison test Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 24/30] repack: add --path-walk option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 25/30] pack-objects: enable --path-walk via config Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 26/30] scalar: enable path-walk during push " Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 27/30] pack-objects: add --full-name-hash option Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 28/30] test-name-hash: add helper to compute name-hash functions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 29/30] p5314: add a size test for name-hash collisions Derrick Stolee via GitGitGadget
2024-09-10 2:28 ` [PATCH 30/30] pack-objects: output debug info about deltas Derrick Stolee via GitGitGadget
2024-09-11 21:32 ` [PATCH 00/30] [RFC] Path-walk API and applications Junio C Hamano
2024-09-17 10:41 ` Christian Couder
2024-09-18 23:18 ` Derrick Stolee
2024-09-22 18:37 ` Junio C Hamano
2024-09-23 1:22 ` Derrick Stolee
2024-09-23 16:56 ` Junio C Hamano
2024-09-22 21:08 ` Kristoffer Haugsbakk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fcc281ac2bfabb6d19e6be40c41157612c5a3f83.1725935335.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=johannes.schindelin@gmx.de \
--cc=johncai86@gmail.com \
--cc=me@ttaylorr.com \
--cc=newren@gmail.com \
--cc=peff@peff.net \
--cc=ps@pks.im \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).