From: "Eslam reda ragheb via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Phillip Wood <phillip.wood123@gmail.com>,
eslam reda <eslam.reda.div@gmail.com>,
Eslam reda ragheb <eslam.reda.div@gmail.com>
Subject: [PATCH v5 04/11] repo: add structure max object size metrics
Date: Fri, 27 Feb 2026 19:30:33 +0000 [thread overview]
Message-ID: <504d9cf7a0dbd663ea88c75217e1564504a60937.1772220640.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2208.v5.git.git.1772220640.gitgitgadget@gmail.com>
From: Eslam reda ragheb <eslam.reda.div@gmail.com>
Extend git repo structure with maximum inflated and on-disk object
sizes, both per type and overall max values.
This complements existing totals by highlighting outliers that
often drive repository bloat analysis.
The implementation updates object counting to track per-type maxima
while walking reachable objects.
It exposes those values in both table and keyvalue formats for
scripts and human output.
Signed-off-by: Eslam reda ragheb <eslam.reda.div@gmail.com>
---
builtin/repo.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 86 insertions(+), 1 deletion(-)
diff --git a/builtin/repo.c b/builtin/repo.c
index e5078e5459..a2fc3fd8cc 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -426,7 +426,9 @@ struct object_values {
struct object_stats {
struct object_values type_counts;
struct object_values inflated_sizes;
+ struct object_values max_inflated_sizes;
struct object_values disk_sizes;
+ struct object_values max_disk_sizes;
};
struct repo_structure {
@@ -529,6 +531,20 @@ static inline size_t get_total_object_values(struct object_values *values)
return values->tags + values->commits + values->trees + values->blobs;
}
+static inline size_t get_max_object_value(struct object_values *values)
+{
+ size_t max = values->commits;
+
+ if (values->trees > max)
+ max = values->trees;
+ if (values->blobs > max)
+ max = values->blobs;
+ if (values->tags > max)
+ max = values->tags;
+
+ return max;
+}
+
static void stats_table_setup_structure(struct stats_table *table,
struct repo_structure *stats)
{
@@ -583,6 +599,26 @@ static void stats_table_setup_structure(struct stats_table *table,
" * %s", _("Blobs"));
stats_table_size_addf(table, objects->disk_sizes.tags,
" * %s", _("Tags"));
+
+ stats_table_size_addf(table, objects->max_inflated_sizes.commits,
+ " * %s", _("Largest commit"));
+ stats_table_size_addf(table, objects->max_inflated_sizes.trees,
+ " * %s", _("Largest tree"));
+ stats_table_size_addf(table, objects->max_inflated_sizes.blobs,
+ " * %s", _("Largest blob"));
+ stats_table_size_addf(table, objects->max_inflated_sizes.tags,
+ " * %s", _("Largest tag"));
+
+ stats_table_size_addf(table, get_max_object_value(&objects->max_disk_sizes),
+ " * %s", _("Largest disk size"));
+ stats_table_size_addf(table, objects->max_disk_sizes.commits,
+ " * %s", _("Commits"));
+ stats_table_size_addf(table, objects->max_disk_sizes.trees,
+ " * %s", _("Trees"));
+ stats_table_size_addf(table, objects->max_disk_sizes.blobs,
+ " * %s", _("Blobs"));
+ stats_table_size_addf(table, objects->max_disk_sizes.tags,
+ " * %s", _("Tags"));
}
static void stats_table_print_structure(const struct stats_table *table)
@@ -661,6 +697,9 @@ static void stats_table_clear(struct stats_table *table)
static void structure_keyvalue_print(struct repo_structure *stats,
char key_delim, char value_delim)
{
+ size_t max_inflated_size = get_max_object_value(&stats->objects.max_inflated_sizes);
+ size_t max_disk_size = get_max_object_value(&stats->objects.max_disk_sizes);
+
printf("references.branches.count%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->refs.branches, value_delim);
printf("references.tags.count%c%" PRIuMAX "%c", key_delim,
@@ -688,6 +727,28 @@ static void structure_keyvalue_print(struct repo_structure *stats,
printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.inflated_sizes.tags, value_delim);
+ printf("objects.max_inflated_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)max_inflated_size, value_delim);
+ printf("objects.commits.max_inflated_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_inflated_sizes.commits, value_delim);
+ printf("objects.trees.max_inflated_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_inflated_sizes.trees, value_delim);
+ printf("objects.blobs.max_inflated_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_inflated_sizes.blobs, value_delim);
+ printf("objects.tags.max_inflated_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_inflated_sizes.tags, value_delim);
+
+ printf("objects.max_disk_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)max_disk_size, value_delim);
+ printf("objects.commits.max_disk_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_disk_sizes.commits, value_delim);
+ printf("objects.trees.max_disk_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_disk_sizes.trees, value_delim);
+ printf("objects.blobs.max_disk_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_disk_sizes.blobs, value_delim);
+ printf("objects.tags.max_disk_size%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_disk_sizes.tags, value_delim);
+
printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.commits, value_delim);
printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim,
@@ -772,6 +833,8 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
struct object_stats *stats = data->stats;
size_t inflated_total = 0;
size_t disk_total = 0;
+ size_t max_inflated = 0;
+ size_t max_disk = 0;
size_t object_count;
for (size_t i = 0; i < oids->nr; i++) {
@@ -786,31 +849,53 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
OBJECT_INFO_SKIP_FETCH_OBJECT |
OBJECT_INFO_QUICK) < 0)
continue;
+ if (disk < 0)
+ continue;
inflated_total += inflated;
- disk_total += disk;
+ disk_total += (size_t)disk;
+ if (inflated > max_inflated)
+ max_inflated = inflated;
+ if ((size_t)disk > max_disk)
+ max_disk = (size_t)disk;
}
switch (type) {
case OBJ_TAG:
stats->type_counts.tags += oids->nr;
stats->inflated_sizes.tags += inflated_total;
+ if (max_inflated > stats->max_inflated_sizes.tags)
+ stats->max_inflated_sizes.tags = max_inflated;
stats->disk_sizes.tags += disk_total;
+ if (max_disk > stats->max_disk_sizes.tags)
+ stats->max_disk_sizes.tags = max_disk;
break;
case OBJ_COMMIT:
stats->type_counts.commits += oids->nr;
stats->inflated_sizes.commits += inflated_total;
+ if (max_inflated > stats->max_inflated_sizes.commits)
+ stats->max_inflated_sizes.commits = max_inflated;
stats->disk_sizes.commits += disk_total;
+ if (max_disk > stats->max_disk_sizes.commits)
+ stats->max_disk_sizes.commits = max_disk;
break;
case OBJ_TREE:
stats->type_counts.trees += oids->nr;
stats->inflated_sizes.trees += inflated_total;
+ if (max_inflated > stats->max_inflated_sizes.trees)
+ stats->max_inflated_sizes.trees = max_inflated;
stats->disk_sizes.trees += disk_total;
+ if (max_disk > stats->max_disk_sizes.trees)
+ stats->max_disk_sizes.trees = max_disk;
break;
case OBJ_BLOB:
stats->type_counts.blobs += oids->nr;
stats->inflated_sizes.blobs += inflated_total;
+ if (max_inflated > stats->max_inflated_sizes.blobs)
+ stats->max_inflated_sizes.blobs = max_inflated;
stats->disk_sizes.blobs += disk_total;
+ if (max_disk > stats->max_disk_sizes.blobs)
+ stats->max_disk_sizes.blobs = max_disk;
break;
default:
BUG("invalid object type");
--
gitgitgadget
next prev parent reply other threads:[~2026-02-27 19:30 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-22 18:28 [PATCH 0/3] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-22 18:28 ` [PATCH 1/3] repo: extend info paths " eslam-reda-div via GitGitGadget
2026-02-22 20:35 ` Lucas Seiki Oshiro
2026-02-23 3:02 ` Justin Tobler
2026-02-22 18:28 ` [PATCH 2/3] t1900,t1901: make repo tests hash-agnostic and wc-portable Eslam reda ragheb via GitGitGadget
2026-02-22 20:46 ` Lucas Seiki Oshiro
2026-02-22 18:28 ` [PATCH 3/3] t1900,t1901: fix test portability issues Eslam reda ragheb via GitGitGadget
2026-02-22 22:37 ` [PATCH 0/3] repo: extend info path reporting and structure statistics Junio C Hamano
2026-02-23 14:21 ` [PATCH v2 0/9] " eslam reda via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 1/9] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 2/9] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 4/9] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 5/9] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 6/9] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 7/9] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 8/9] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 9/9] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 0/5] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 1/5] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 2/5] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 3/5] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 4/5] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 5/5] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 00/10] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 01/10] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-26 23:21 ` Junio C Hamano
2026-02-26 21:14 ` [PATCH v4 02/10] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-26 23:29 ` Junio C Hamano
2026-02-27 9:04 ` Phillip Wood
2026-02-27 19:51 ` Junio C Hamano
2026-03-01 10:36 ` Phillip Wood
2026-03-02 6:42 ` Junio C Hamano
2026-02-26 21:14 ` [PATCH v4 03/10] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 04/10] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 05/10] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 06/10] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 07/10] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 08/10] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 09/10] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 10/10] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 00/11] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 01/11] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-27 21:42 ` Lucas Seiki Oshiro
2026-02-27 19:30 ` [PATCH v5 02/11] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 03/11] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` Eslam reda ragheb via GitGitGadget [this message]
2026-02-27 19:30 ` [PATCH v5 05/11] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 06/11] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 07/11] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 08/11] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 09/11] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 10/11] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 11/11] repo: refine path keys for repo info Eslam reda ragheb via GitGitGadget
2026-03-01 10:33 ` Phillip Wood
2026-02-27 21:52 ` [PATCH v5 00/11] repo info: add category/path keys and --path-format Lucas Seiki Oshiro
2026-03-02 5:15 ` [PATCH v6 0/6] " eslam reda via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 1/6] repo: introduce repo_info context plumbing Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 2/6] repo: support category requests in repo info Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 3/6] repo: add path keys to " Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 4/6] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 6/6] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-03-18 20:44 ` [PATCH v6 0/6] repo info: add category/path keys and --path-format Jialong Wang
2026-03-19 3:36 ` K Jayatheerth
2026-03-19 20:32 ` Jerry Wang
2026-03-20 1:49 ` K Jayatheerth
2026-03-19 20:58 ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Jialong Wang
2026-03-19 20:59 ` [PATCH] t1900: cover repo info path keys in non-default layouts Jialong Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=504d9cf7a0dbd663ea88c75217e1564504a60937.1772220640.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=eslam.reda.div@gmail.com \
--cc=git@vger.kernel.org \
--cc=phillip.wood123@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox