git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, karthik.188@gmail.com,
	Justin Tobler <jltobler@gmail.com>,
	Derrick Stolee <stolee@gmail.com>
Subject: [PATCH v3 4/7] builtin/repo: introduce stats subcommand
Date: Thu, 25 Sep 2025 18:29:25 -0500	[thread overview]
Message-ID: <20250925232928.3846-5-jltobler@gmail.com> (raw)
In-Reply-To: <20250925232928.3846-1-jltobler@gmail.com>

The shape of a repository's history can have huge impacts on the
performance and health of the repository itself. Currently, Git lacks a
means to surface key stats/information regarding the shape of a
repository via a single command. Acquiring this information requires
users to be fairly knowledgeable about the structure of a Git repository
and how to identify the relevant data points. To fill this gap,
supplemental tools such as git-sizer(1) have been developed.

To allow users to more readily identify potential issues for a
repository, introduce the "stats" subcommand in git-repo(1) to output
stats for the repository that may be of interest to users. The goal of
this subcommand is to eventually provide similar functionality to
git-sizer(1), but natively in Git.

The initial version of this command only iterates through all references
in the repository and tracks the count of branches, tags, remote refs,
and other reference types. The corresponding information is displayed in
a human-friendly table formatted in a very similar manner to
git-sizer(1). The width of each table column is adjusted automatically
to satisfy the requirements of the widest row contained.

Subsequent commits will surface additional relevant data points to
output.

Based-on-patch-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
 Documentation/git-repo.adoc |  10 ++
 builtin/repo.c              | 196 ++++++++++++++++++++++++++++++++++++
 t/meson.build               |   1 +
 t/t1901-repo-stats.sh       |  61 +++++++++++
 4 files changed, 268 insertions(+)
 create mode 100755 t/t1901-repo-stats.sh

diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc
index 209afd1b61..a009bf8cf1 100644
--- a/Documentation/git-repo.adoc
+++ b/Documentation/git-repo.adoc
@@ -9,6 +9,7 @@ SYNOPSIS
 --------
 [synopsis]
 git repo info [--format=(keyvalue|nul)] [-z] [<key>...]
+git repo stats
 
 DESCRIPTION
 -----------
@@ -43,6 +44,15 @@ supported:
 +
 `-z` is an alias for `--format=nul`.
 
+`stats`::
+	Retrieve statistics about the current repository. The following kinds
+	of information are reported:
++
+* Reference counts categorized by type
+
++
+The table output format may change and is not intended for machine parsing.
+
 INFO KEYS
 ---------
 In order to obtain a set of values from `git repo info`, you should provide
diff --git a/builtin/repo.c b/builtin/repo.c
index eeeab8fbd2..0b7dd636e5 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -4,12 +4,15 @@
 #include "environment.h"
 #include "parse-options.h"
 #include "quote.h"
+#include "ref-filter.h"
 #include "refs.h"
 #include "strbuf.h"
+#include "string-list.h"
 #include "shallow.h"
 
 static const char *const repo_usage[] = {
 	"git repo info [--format=(keyvalue|nul)] [-z] [<key>...]",
+	"git repo stats",
 	NULL
 };
 
@@ -156,12 +159,205 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix,
 	return print_fields(argc, argv, repo, format);
 }
 
+struct ref_stats {
+	size_t branches;
+	size_t remotes;
+	size_t tags;
+	size_t others;
+};
+
+struct stats_table {
+	struct string_list rows;
+
+	size_t name_col_width;
+	size_t value_col_width;
+};
+
+/*
+ * Holds column data that gets stored for each row.
+ */
+struct stats_table_entry {
+	char *value;
+};
+
+static void stats_table_vaddf(struct stats_table *table,
+			      struct stats_table_entry *entry,
+			      const char *format, va_list ap)
+{
+	struct strbuf buf = STRBUF_INIT;
+	struct string_list_item *item;
+	char *formatted_name;
+	size_t name_width;
+
+	strbuf_vaddf(&buf, format, ap);
+	formatted_name = strbuf_detach(&buf, &name_width);
+
+	item = string_list_append_nodup(&table->rows, formatted_name);
+	item->util = entry;
+
+	if (name_width > table->name_col_width)
+		table->name_col_width = name_width;
+	if (entry) {
+		size_t value_width = strlen(entry->value);
+		if (value_width > table->value_col_width)
+			table->value_col_width = value_width;
+	}
+}
+
+static void stats_table_addf(struct stats_table *table, const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	stats_table_vaddf(table, NULL, format, ap);
+	va_end(ap);
+}
+
+static void stats_table_count_addf(struct stats_table *table, size_t value,
+				   const char *format, ...)
+{
+	struct stats_table_entry *entry;
+	va_list ap;
+
+	CALLOC_ARRAY(entry, 1);
+	entry->value = xstrfmt("%" PRIuMAX, (uintmax_t)value);
+
+	va_start(ap, format);
+	stats_table_vaddf(table, entry, format, ap);
+	va_end(ap);
+}
+
+static void stats_table_setup(struct stats_table *table, struct ref_stats *refs)
+{
+	size_t ref_total;
+
+	ref_total = refs->branches + refs->remotes + refs->tags + refs->others;
+	stats_table_addf(table, "* %s", _("References"));
+	stats_table_count_addf(table, ref_total, "  * %s", _("Count"));
+	stats_table_count_addf(table, refs->branches, "    * %s", _("Branches"));
+	stats_table_count_addf(table, refs->tags, "    * %s", _("Tags"));
+	stats_table_count_addf(table, refs->remotes, "    * %s", _("Remotes"));
+	stats_table_count_addf(table, refs->others, "    * %s", _("Others"));
+}
+
+static inline size_t max_size_t(size_t a, size_t b)
+{
+	return (a > b) ? a : b;
+}
+
+static void stats_table_print(const struct stats_table *table)
+{
+	const char *name_col_title = _("Repository stats");
+	const char *value_col_title = _("Value");
+	size_t name_title_len = strlen(name_col_title);
+	size_t value_title_len = strlen(value_col_title);
+	struct string_list_item *item;
+	int name_col_width;
+	int value_col_width;
+
+	name_col_width = cast_size_t_to_int(
+		max_size_t(table->name_col_width, name_title_len));
+	value_col_width = cast_size_t_to_int(
+		max_size_t(table->value_col_width, value_title_len));
+
+	printf("| %-*s | %-*s |\n", name_col_width, name_col_title,
+	       value_col_width, value_col_title);
+	printf("| ");
+	for (int i = 0; i < name_col_width; i++)
+		putchar('-');
+	printf(" | ");
+	for (int i = 0; i < value_col_width; i++)
+		putchar('-');
+	printf(" |\n");
+
+	for_each_string_list_item(item, &table->rows) {
+		struct stats_table_entry *entry = item->util;
+		const char *value = "";
+
+		if (entry) {
+			struct stats_table_entry *entry = item->util;
+			value = entry->value;
+		}
+
+		printf("| %-*s | %*s |\n", name_col_width, item->string,
+		       value_col_width, value);
+	}
+}
+
+static void stats_table_clear(struct stats_table *table)
+{
+	struct stats_table_entry *entry;
+	struct string_list_item *item;
+
+	for_each_string_list_item(item, &table->rows) {
+		entry = item->util;
+		if (entry)
+			free(entry->value);
+	}
+
+	string_list_clear(&table->rows, 1);
+}
+
+static void stats_count_references(struct ref_stats *stats, struct ref_array *refs)
+{
+	for (int i = 0; i < refs->nr; i++) {
+		struct ref_array_item *ref = refs->items[i];
+
+		switch (ref->kind) {
+		case FILTER_REFS_BRANCHES:
+			stats->branches++;
+			break;
+		case FILTER_REFS_REMOTES:
+			stats->remotes++;
+			break;
+		case FILTER_REFS_TAGS:
+			stats->tags++;
+			break;
+		case FILTER_REFS_OTHERS:
+			stats->others++;
+			break;
+		default:
+			BUG("unexpected reference type");
+		}
+	}
+}
+
+static int cmd_repo_stats(int argc, const char **argv, const char *prefix,
+			  struct repository *repo UNUSED)
+{
+	struct ref_filter filter = REF_FILTER_INIT;
+	struct stats_table table = {
+		.rows = STRING_LIST_INIT_DUP,
+	};
+	struct ref_stats stats = { 0 };
+	struct ref_array refs = { 0 };
+	struct option options[] = { 0 };
+
+	argc = parse_options(argc, argv, prefix, options, repo_usage, 0);
+	if (argc)
+		usage(_("too many arguments"));
+
+	if (filter_refs(&refs, &filter, FILTER_REFS_REGULAR))
+		die(_("unable to filter refs"));
+
+	stats_count_references(&stats, &refs);
+
+	stats_table_setup(&table, &stats);
+	stats_table_print(&table);
+
+	stats_table_clear(&table);
+	ref_array_clear(&refs);
+
+	return 0;
+}
+
 int cmd_repo(int argc, const char **argv, const char *prefix,
 	     struct repository *repo)
 {
 	parse_opt_subcommand_fn *fn = NULL;
 	struct option options[] = {
 		OPT_SUBCOMMAND("info", &fn, cmd_repo_info),
+		OPT_SUBCOMMAND("stats", &fn, cmd_repo_stats),
 		OPT_END()
 	};
 
diff --git a/t/meson.build b/t/meson.build
index 7974795fe4..071d4a5112 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -236,6 +236,7 @@ integration_tests = [
   't1701-racy-split-index.sh',
   't1800-hook.sh',
   't1900-repo.sh',
+  't1901-repo-stats.sh',
   't2000-conflict-when-checking-files-out.sh',
   't2002-checkout-cache-u.sh',
   't2003-checkout-cache-mkdir.sh',
diff --git a/t/t1901-repo-stats.sh b/t/t1901-repo-stats.sh
new file mode 100755
index 0000000000..535ac511dd
--- /dev/null
+++ b/t/t1901-repo-stats.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+test_description='test git repo stats'
+
+. ./test-lib.sh
+
+test_expect_success 'empty repository' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		cat >expect <<-\EOF &&
+		| Repository stats | Value |
+		| ---------------- | ----- |
+		| * References     |       |
+		|   * Count        |     0 |
+		|     * Branches   |     0 |
+		|     * Tags       |     0 |
+		|     * Remotes    |     0 |
+		|     * Others     |     0 |
+		EOF
+
+		git repo stats >out 2>err &&
+
+		test_cmp expect out &&
+		test_line_count = 0 err
+	)
+'
+
+test_expect_success 'repository with references' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		git commit --allow-empty -m init &&
+		git tag -a foo -m bar &&
+
+		oid="$(git rev-parse HEAD)" &&
+		git update-ref refs/remotes/origin/foo "$oid" &&
+
+		git notes add -m foo &&
+
+		cat >expect <<-\EOF &&
+		| Repository stats | Value |
+		| ---------------- | ----- |
+		| * References     |       |
+		|   * Count        |     4 |
+		|     * Branches   |     1 |
+		|     * Tags       |     1 |
+		|     * Remotes    |     1 |
+		|     * Others     |     1 |
+		EOF
+
+		git repo stats >out 2>err &&
+
+		test_cmp expect out &&
+		test_line_count = 0 err
+	)
+'
+
+test_done
-- 
2.51.0.193.g4975ec3473b


  parent reply	other threads:[~2025-09-25 23:29 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-23  2:56 [PATCH 0/4] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-23  2:56 ` [PATCH 1/4] " Justin Tobler
2025-09-23 10:52   ` Patrick Steinhardt
2025-09-23 15:10     ` Justin Tobler
2025-09-23 15:26       ` Patrick Steinhardt
2025-09-23 15:22   ` Karthik Nayak
2025-09-23 15:55     ` Justin Tobler
2025-09-23  2:56 ` [PATCH 2/4] builtin/repo: add object counts in stats output Justin Tobler
2025-09-23 10:52   ` Patrick Steinhardt
2025-09-23 15:19     ` Justin Tobler
2025-09-23 15:30   ` Karthik Nayak
2025-09-23 15:56     ` Justin Tobler
2025-09-23  2:56 ` [PATCH 3/4] builtin/repo: add keyvalue format for stats Justin Tobler
2025-09-23 10:53   ` Patrick Steinhardt
2025-09-23 15:26     ` Justin Tobler
2025-09-23 15:39   ` Karthik Nayak
2025-09-23 15:59     ` Justin Tobler
2025-09-23  2:57 ` [PATCH 4/4] builtin/repo: add nul " Justin Tobler
2025-09-23 10:53   ` Patrick Steinhardt
2025-09-23 15:33     ` Justin Tobler
2025-09-24  4:48       ` Patrick Steinhardt
2025-09-23 15:41   ` Karthik Nayak
2025-09-23 16:02     ` Justin Tobler
2025-09-24 21:24 ` [PATCH v2 0/6] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-24 21:24   ` [PATCH v2 1/6] builtin/repo: rename repo_info() to cmd_repo_info() Justin Tobler
2025-09-24 21:24   ` [PATCH v2 2/6] ref-filter: allow NULL filter pattern Justin Tobler
2025-09-24 21:24   ` [PATCH v2 3/6] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-25  5:38     ` Patrick Steinhardt
2025-09-25 13:01       ` Justin Tobler
2025-09-24 21:24   ` [PATCH v2 4/6] builtin/repo: add object counts in stats output Justin Tobler
2025-09-24 21:24   ` [PATCH v2 5/6] builtin/repo: add keyvalue and nul format for stats Justin Tobler
2025-09-25  5:39     ` Patrick Steinhardt
2025-09-25 13:16       ` Justin Tobler
2025-09-25 13:58         ` Patrick Steinhardt
2025-09-24 21:24   ` [PATCH v2 6/6] builtin/repo: add progress meter " Justin Tobler
2025-09-25  5:39     ` Patrick Steinhardt
2025-09-25 13:20       ` Justin Tobler
2025-09-25 23:29   ` [PATCH v3 0/7] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-25 23:29     ` [PATCH v3 1/7] builtin/repo: rename repo_info() to cmd_repo_info() Justin Tobler
2025-09-25 23:29     ` [PATCH v3 2/7] ref-filter: allow NULL filter pattern Justin Tobler
2025-09-25 23:29     ` [PATCH v3 3/7] clang-format: exclude control macros from SpaceBeforeParens Justin Tobler
2025-09-25 23:29     ` Justin Tobler [this message]
2025-09-25 23:51       ` [PATCH v3 4/7] builtin/repo: introduce stats subcommand Eric Sunshine
2025-09-26  1:38         ` Justin Tobler
2025-09-25 23:29     ` [PATCH v3 5/7] builtin/repo: add object counts in stats output Justin Tobler
2025-09-25 23:29     ` [PATCH v3 6/7] builtin/repo: add keyvalue and nul format for stats Justin Tobler
2025-09-25 23:29     ` [PATCH v3 7/7] builtin/repo: add progress meter " Justin Tobler
2025-09-27 14:50     ` [PATCH v4 0/7] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-27 14:50       ` [PATCH v4 1/7] builtin/repo: rename repo_info() to cmd_repo_info() Justin Tobler
2025-09-27 14:50       ` [PATCH v4 2/7] ref-filter: allow NULL filter pattern Justin Tobler
2025-09-27 14:50       ` [PATCH v4 3/7] clang-format: exclude control macros from SpaceBeforeParens Justin Tobler
2025-09-27 15:40         ` Junio C Hamano
2025-09-27 15:51           ` Justin Tobler
2025-09-27 23:49             ` Junio C Hamano
2025-09-27 14:50       ` [PATCH v4 4/7] builtin/repo: introduce stats subcommand Justin Tobler
2025-09-27 16:32         ` Junio C Hamano
2025-10-09 22:09           ` Justin Tobler
2025-10-10  0:42             ` Justin Tobler
2025-10-10  6:53               ` Patrick Steinhardt
2025-10-10 14:34                 ` Justin Tobler
2025-10-13  6:13                   ` Patrick Steinhardt
2025-09-27 14:50       ` [PATCH v4 5/7] builtin/repo: add object counts in stats output Justin Tobler
2025-09-27 14:50       ` [PATCH v4 6/7] builtin/repo: add keyvalue and nul format for stats Justin Tobler
2025-09-27 14:50       ` [PATCH v4 7/7] builtin/repo: add progress meter " Justin Tobler
2025-09-27 16:33       ` [PATCH v4 0/7] builtin/repo: introduce stats subcommand Junio C Hamano
2025-10-15 21:12       ` [PATCH v5 0/6] builtin/repo: introduce structure subcommand Justin Tobler
2025-10-15 21:12         ` [PATCH v5 1/6] builtin/repo: rename repo_info() to cmd_repo_info() Justin Tobler
2025-10-15 21:12         ` [PATCH v5 2/6] ref-filter: allow NULL filter pattern Justin Tobler
2025-10-15 21:12         ` [PATCH v5 3/6] builtin/repo: introduce structure subcommand Justin Tobler
2025-10-16 10:58           ` Patrick Steinhardt
2025-10-21 16:04             ` Justin Tobler
2025-10-15 21:12         ` [PATCH v5 4/6] builtin/repo: add object counts in structure output Justin Tobler
2025-10-15 21:12         ` [PATCH v5 5/6] builtin/repo: add keyvalue and nul format for structure stats Justin Tobler
2025-10-15 21:12         ` [PATCH v5 6/6] builtin/repo: add progress meter " Justin Tobler
2025-10-21 18:25         ` [PATCH v6 0/7] builtin/repo: introduce structure subcommand Justin Tobler
2025-10-21 18:25           ` [PATCH v6 1/7] builtin/repo: rename repo_info() to cmd_repo_info() Justin Tobler
2025-10-21 18:25           ` [PATCH v6 2/7] ref-filter: allow NULL filter pattern Justin Tobler
2025-10-21 18:25           ` [PATCH v6 3/7] ref-filter: export ref_kind_from_refname() Justin Tobler
2025-10-21 18:25           ` [PATCH v6 4/7] builtin/repo: introduce structure subcommand Justin Tobler
2025-10-22  5:01             ` Patrick Steinhardt
2025-10-22 13:50               ` Justin Tobler
2025-10-22 20:15             ` Lucas Seiki Oshiro
2025-10-22 23:42               ` Justin Tobler
2025-10-21 18:25           ` [PATCH v6 5/7] builtin/repo: add object counts in structure output Justin Tobler
2025-10-21 18:26           ` [PATCH v6 6/7] builtin/repo: add keyvalue and nul format for structure stats Justin Tobler
2025-10-22 20:34             ` Lucas Seiki Oshiro
2025-10-23  0:03               ` Justin Tobler
2025-10-21 18:26           ` [PATCH v6 7/7] builtin/repo: add progress meter " Justin Tobler
2025-10-22 19:23           ` [PATCH v6 0/7] builtin/repo: introduce structure subcommand Lucas Seiki Oshiro
2025-10-23  0:05             ` Justin Tobler
2025-10-23 20:54           ` Junio C Hamano
2025-10-24  5:14             ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250925232928.3846-5-jltobler@gmail.com \
    --to=jltobler@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=karthik.188@gmail.com \
    --cc=ps@pks.im \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).