From: Justin Tobler <jltobler@gmail.com>
To: git@vger.kernel.org
Cc: ps@pks.im, gitster@pobox.com, kristofferhaugsbakk@fastmail.com,
eslam.reda.div@gmail.com, Justin Tobler <jltobler@gmail.com>
Subject: [PATCH v2 2/5] builtin/repo: collect largest inflated objects
Date: Mon, 23 Feb 2026 11:41:17 -0600 [thread overview]
Message-ID: <20260223174120.2356504-3-jltobler@gmail.com> (raw)
In-Reply-To: <20260223174120.2356504-1-jltobler@gmail.com>
The "structure" output for git-repo(1) shows the total inflated and disk
sizes of reachable objects in the repository, but doesn't show the size
of the largest individual objects. Since an individual object may be a
large contributor to the overall repository size, it is useful for users
to know the maximum size of individual objects.
While interating across objects, record the size and OID of the largest
objects encountered for each object type to provide as output. Note that
the default "table" output format only displays size information and not
the corresponding OID. In a subsequent commit, the table format is
updated to add table annotations that mention the OID.
Signed-off-by: Justin Tobler <jltobler@gmail.com>
---
Documentation/git-repo.adoc | 1 +
builtin/repo.c | 63 +++++++++++++++++++++++++++++++++++++
t/t1901-repo-structure.sh | 28 +++++++++++++++++
3 files changed, 92 insertions(+)
diff --git a/Documentation/git-repo.adoc b/Documentation/git-repo.adoc
index 7d70270dfa..e812e59158 100644
--- a/Documentation/git-repo.adoc
+++ b/Documentation/git-repo.adoc
@@ -52,6 +52,7 @@ supported:
* Reachable object counts categorized by type
* Total inflated size of reachable objects by type
* Total disk size of reachable objects by type
+* Largest reachable objects in the repository by type
+
The output format can be chosen through the flag `--format`. Three formats are
supported:
diff --git a/builtin/repo.c b/builtin/repo.c
index c7c9f0f497..51a4359685 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -2,6 +2,7 @@
#include "builtin.h"
#include "environment.h"
+#include "hash.h"
#include "hex.h"
#include "odb.h"
#include "parse-options.h"
@@ -197,6 +198,18 @@ static int cmd_repo_info(int argc, const char **argv, const char *prefix,
return print_fields(argc, argv, repo, format);
}
+struct object_data {
+ struct object_id oid;
+ size_t value;
+};
+
+struct largest_objects {
+ struct object_data tag_size;
+ struct object_data commit_size;
+ struct object_data tree_size;
+ struct object_data blob_size;
+};
+
struct ref_stats {
size_t branches;
size_t remotes;
@@ -215,6 +228,7 @@ struct object_stats {
struct object_values type_counts;
struct object_values inflated_sizes;
struct object_values disk_sizes;
+ struct largest_objects largest;
};
struct repo_structure {
@@ -371,6 +385,21 @@ static void stats_table_setup_structure(struct stats_table *table,
" * %s", _("Blobs"));
stats_table_size_addf(table, objects->disk_sizes.tags,
" * %s", _("Tags"));
+
+ stats_table_addf(table, "");
+ stats_table_addf(table, "* %s", _("Largest objects"));
+ stats_table_addf(table, " * %s", _("Commits"));
+ stats_table_size_addf(table, objects->largest.commit_size.value,
+ " * %s", _("Maximum size"));
+ stats_table_addf(table, " * %s", _("Trees"));
+ stats_table_size_addf(table, objects->largest.tree_size.value,
+ " * %s", _("Maximum size"));
+ stats_table_addf(table, " * %s", _("Blobs"));
+ stats_table_size_addf(table, objects->largest.blob_size.value,
+ " * %s", _("Maximum size"));
+ stats_table_addf(table, " * %s", _("Tags"));
+ stats_table_size_addf(table, objects->largest.tag_size.value,
+ " * %s", _("Maximum size"));
}
static void stats_table_print_structure(const struct stats_table *table)
@@ -485,6 +514,23 @@ static void structure_keyvalue_print(struct repo_structure *stats,
printf("objects.tags.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.tags, value_delim);
+ printf("objects.commits.max_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.largest.commit_size.value, value_delim);
+ printf("objects.commits.max_size_oid%c%s%c", key_delim,
+ oid_to_hex(&stats->objects.largest.commit_size.oid), value_delim);
+ printf("objects.trees.max_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.largest.tree_size.value, value_delim);
+ printf("objects.trees.max_size_oid%c%s%c", key_delim,
+ oid_to_hex(&stats->objects.largest.tree_size.oid), value_delim);
+ printf("objects.blobs.max_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.largest.blob_size.value, value_delim);
+ printf("objects.blobs.max_size_oid%c%s%c", key_delim,
+ oid_to_hex(&stats->objects.largest.blob_size.oid), value_delim);
+ printf("objects.tags.max_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.largest.tag_size.value, value_delim);
+ printf("objects.tags.max_size_oid%c%s%c", key_delim,
+ oid_to_hex(&stats->objects.largest.tag_size.oid), value_delim);
+
fflush(stdout);
}
@@ -553,6 +599,15 @@ struct count_objects_data {
struct progress *progress;
};
+static void check_largest(struct object_data *data, struct object_id *oid,
+ size_t value)
+{
+ if (value > data->value) {
+ oidcpy(&data->oid, oid);
+ data->value = value;
+ }
+}
+
static int count_objects(const char *path UNUSED, struct oid_array *oids,
enum object_type type, void *cb_data)
{
@@ -578,21 +633,29 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
stats->type_counts.tags++;
stats->inflated_sizes.tags += inflated;
stats->disk_sizes.tags += disk;
+ check_largest(&stats->largest.tag_size, &oids->oid[i],
+ inflated);
break;
case OBJ_COMMIT:
stats->type_counts.commits++;
stats->inflated_sizes.commits += inflated;
stats->disk_sizes.commits += disk;
+ check_largest(&stats->largest.commit_size, &oids->oid[i],
+ inflated);
break;
case OBJ_TREE:
stats->type_counts.trees++;
stats->inflated_sizes.trees += inflated;
stats->disk_sizes.trees += disk;
+ check_largest(&stats->largest.tree_size, &oids->oid[i],
+ inflated);
break;
case OBJ_BLOB:
stats->type_counts.blobs++;
stats->inflated_sizes.blobs += inflated;
stats->disk_sizes.blobs += disk;
+ check_largest(&stats->largest.blob_size, &oids->oid[i],
+ inflated);
break;
default:
BUG("invalid object type");
diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh
index 17ff164b05..1999f325d0 100755
--- a/t/t1901-repo-structure.sh
+++ b/t/t1901-repo-structure.sh
@@ -52,6 +52,16 @@ test_expect_success 'empty repository' '
| * Trees | 0 B |
| * Blobs | 0 B |
| * Tags | 0 B |
+ | | |
+ | * Largest objects | |
+ | * Commits | |
+ | * Maximum size | 0 B |
+ | * Trees | |
+ | * Maximum size | 0 B |
+ | * Blobs | |
+ | * Maximum size | 0 B |
+ | * Tags | |
+ | * Maximum size | 0 B |
EOF
git repo structure >out 2>err &&
@@ -104,6 +114,16 @@ test_expect_success SHA1 'repository with references and objects' '
| * Trees | $(object_type_disk_usage tree true) |
| * Blobs | $(object_type_disk_usage blob true) |
| * Tags | $(object_type_disk_usage tag) B |
+ | | |
+ | * Largest objects | |
+ | * Commits | |
+ | * Maximum size | 223 B |
+ | * Trees | |
+ | * Maximum size | 32.29 KiB |
+ | * Blobs | |
+ | * Maximum size | 13 B |
+ | * Tags | |
+ | * Maximum size | 132 B |
EOF
git repo structure >out 2>err &&
@@ -138,6 +158,14 @@ test_expect_success SHA1 'keyvalue and nul format' '
objects.trees.disk_size=$(object_type_disk_usage tree)
objects.blobs.disk_size=$(object_type_disk_usage blob)
objects.tags.disk_size=$(object_type_disk_usage tag)
+ objects.commits.max_size=221
+ objects.commits.max_size_oid=de3508174b5c2ace6993da67cae9be9069e2df39
+ objects.trees.max_size=1335
+ objects.trees.max_size_oid=09931deea9d81ec21300d3e13c74412f32eacec5
+ objects.blobs.max_size=11
+ objects.blobs.max_size_oid=eaeeedced46482bd4281fda5a5f05ce24854151f
+ objects.tags.max_size=132
+ objects.tags.max_size_oid=1ee0f2b16ea37d895dbe9dbd76cd2ac70446176c
EOF
git repo structure --format=keyvalue >out 2>err &&
--
2.53.0
next prev parent reply other threads:[~2026-02-23 17:41 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-03 22:17 [PATCH 0/5] builtin/repo: include largest object information Justin Tobler
2026-02-03 22:17 ` [PATCH 1/5] builtin/repo: update stats for each object Justin Tobler
2026-02-03 22:36 ` Junio C Hamano
2026-02-18 19:40 ` Justin Tobler
2026-02-26 19:20 ` Junio C Hamano
2026-02-26 19:29 ` Justin Tobler
2026-02-03 22:17 ` [PATCH 2/5] builtin/repo: collect largest inflated objects Justin Tobler
2026-02-03 22:45 ` Junio C Hamano
2026-02-18 20:01 ` Justin Tobler
2026-02-03 22:17 ` [PATCH 3/5] builtin/repo: add OID annotations to table output Justin Tobler
2026-02-13 13:14 ` Patrick Steinhardt
2026-02-18 20:13 ` Justin Tobler
2026-02-03 22:17 ` [PATCH 4/5] builtin/repo: find commit with most parents Justin Tobler
2026-02-03 22:48 ` Junio C Hamano
2026-02-03 23:14 ` Kristoffer Haugsbakk
2026-02-03 23:33 ` Junio C Hamano
2026-02-18 20:06 ` Justin Tobler
2026-02-03 22:17 ` [PATCH 5/5] builtin/repo: find tree with most entries Justin Tobler
2026-02-03 22:50 ` Junio C Hamano
2026-02-04 8:28 ` Patrick Steinhardt
2026-02-04 15:28 ` Junio C Hamano
2026-02-23 17:41 ` [PATCH v2 0/5] builtin/repo: include largest object information Justin Tobler
2026-02-23 17:41 ` [PATCH v2 1/5] builtin/repo: update stats for each object Justin Tobler
2026-02-23 17:41 ` Justin Tobler [this message]
2026-02-26 19:50 ` [PATCH v2 2/5] builtin/repo: collect largest inflated objects Junio C Hamano
2026-03-02 17:28 ` Justin Tobler
2026-02-28 23:36 ` Lucas Seiki Oshiro
2026-03-02 17:38 ` Justin Tobler
2026-02-23 17:41 ` [PATCH v2 3/5] builtin/repo: add OID annotations to table output Justin Tobler
2026-02-26 19:56 ` Junio C Hamano
2026-03-02 17:39 ` Justin Tobler
2026-02-23 17:41 ` [PATCH v2 4/5] builtin/repo: find commit with most parents Justin Tobler
2026-02-23 17:41 ` [PATCH v2 5/5] builtin/repo: find tree with most entries Justin Tobler
2026-02-24 9:35 ` [PATCH v2 0/5] builtin/repo: include largest object information Patrick Steinhardt
2026-02-28 23:43 ` Lucas Seiki Oshiro
2026-03-01 19:22 ` Justin Tobler
2026-03-02 21:45 ` [PATCH v3 0/6] " Justin Tobler
2026-03-02 21:45 ` [PATCH v3 1/6] builtin/repo: update stats for each object Justin Tobler
2026-03-02 21:45 ` [PATCH v3 2/6] builtin/repo: add helper for printing keyvalue output Justin Tobler
2026-03-03 13:27 ` Patrick Steinhardt
2026-03-03 17:40 ` Junio C Hamano
2026-03-03 18:08 ` Justin Tobler
2026-03-02 21:45 ` [PATCH v3 3/6] builtin/repo: collect largest inflated objects Justin Tobler
2026-03-03 13:27 ` Patrick Steinhardt
2026-03-02 21:45 ` [PATCH v3 4/6] builtin/repo: add OID annotations to table output Justin Tobler
2026-03-02 21:45 ` [PATCH v3 5/6] builtin/repo: find commit with most parents Justin Tobler
2026-03-02 21:45 ` [PATCH v3 6/6] builtin/repo: find tree with most entries Justin Tobler
2026-03-02 22:09 ` [PATCH v3 0/6] builtin/repo: include largest object information Junio C Hamano
2026-03-06 22:36 ` Junio C Hamano
2026-03-08 18:44 ` Justin Tobler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260223174120.2356504-3-jltobler@gmail.com \
--to=jltobler@gmail.com \
--cc=eslam.reda.div@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=kristofferhaugsbakk@fastmail.com \
--cc=ps@pks.im \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox