From: "Eslam reda ragheb via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Karthik Nayak <karthik.188@gmail.com>,
Justin Tobler <jltobler@gmail.com>,
Ayush Chandekar <ayu.chandekar@gmail.com>,
Siddharth Asthana <siddharthasthana31@gmail.com>,
Lucas Seiki Oshiro <lucasseikioshiro@gmail.com>,
eslam reda <eslam.reda.div@gmail.com>,
Eslam reda ragheb <eslam.reda.div@gmail.com>
Subject: [PATCH v2 5/9] repo: add structure topology and path-depth metrics
Date: Mon, 23 Feb 2026 14:21:05 +0000 [thread overview]
Message-ID: <4b502925c9d70d37e0752fadfe061f1cdf692488.1771856469.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2208.v2.git.git.1771856469.gitgitgadget@gmail.com>
From: Eslam reda ragheb <eslam.reda.div@gmail.com>
Track additional structure-oriented maxima that are useful when
diagnosing unusually complex histories.
These include commit parent fanout, tree entry count, blob path
length/depth, and annotated tag chain depth.
The counters are gathered while traversing reachable objects and
are reported in both table and keyvalue output.
This lets both humans and scripts consume the same topology
signals.
Signed-off-by: Eslam reda ragheb <eslam.reda.div@gmail.com>
---
builtin/repo.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 170 insertions(+), 1 deletion(-)
diff --git a/builtin/repo.c b/builtin/repo.c
index a2fc3fd8cc..f92c209469 100644
--- a/builtin/repo.c
+++ b/builtin/repo.c
@@ -17,6 +17,7 @@
#include "string-list.h"
#include "shallow.h"
#include "submodule.h"
+#include "tree-walk.h"
#include "utf8.h"
static const char *const repo_usage[] = {
@@ -429,6 +430,11 @@ struct object_stats {
struct object_values max_inflated_sizes;
struct object_values disk_sizes;
struct object_values max_disk_sizes;
+ size_t max_commit_parent_count;
+ size_t max_tree_entry_count;
+ size_t max_blob_path_length;
+ size_t max_blob_path_depth;
+ size_t max_tag_chain_depth;
};
struct repo_structure {
@@ -545,6 +551,116 @@ static inline size_t get_max_object_value(struct object_values *values)
return max;
}
+static size_t get_commit_parent_count(struct repository *repo,
+ const struct object_id *oid)
+{
+ unsigned long size = 0;
+ const char *cur;
+ const char *end;
+ void *buf;
+ size_t count = 0;
+
+ buf = odb_read_object_peeled(repo->objects, oid, OBJ_COMMIT, &size, NULL);
+ if (!buf)
+ return 0;
+
+ cur = buf;
+ end = cur + size;
+ while (cur < end) {
+ const char *newline = memchr(cur, '\n', end - cur);
+ size_t line_len;
+
+ if (!newline)
+ break;
+ line_len = newline - cur;
+ if (!line_len)
+ break;
+
+ if (line_len > 7 && !memcmp(cur, "parent ", 7))
+ count++;
+
+ cur = newline + 1;
+ }
+
+ free(buf);
+ return count;
+}
+
+static size_t get_tree_entry_count(struct repository *repo,
+ const struct object_id *oid)
+{
+ struct tree_desc desc;
+ struct name_entry entry;
+ unsigned long size = 0;
+ void *buf;
+ size_t count = 0;
+
+ buf = odb_read_object_peeled(repo->objects, oid, OBJ_TREE, &size, NULL);
+ if (!buf)
+ return 0;
+
+ init_tree_desc(&desc, oid, buf, size);
+ while (tree_entry(&desc, &entry))
+ count++;
+
+ free(buf);
+ return count;
+}
+
+static size_t get_path_depth(const char *path)
+{
+ size_t depth = 0;
+
+ if (!path || !*path)
+ return 0;
+
+ depth = 1;
+ for (const char *cur = path; *cur; cur++)
+ if (*cur == '/')
+ depth++;
+
+ return depth;
+}
+
+static size_t get_tag_chain_depth(struct repository *repo,
+ const struct object_id *oid)
+{
+ struct object_id current = *oid;
+ size_t depth = 0;
+
+ while (1) {
+ enum object_type type;
+ unsigned long size = 0;
+ struct object_id next;
+ const char *p, *end;
+ void *buf = odb_read_object(repo->objects, ¤t, &type, &size);
+
+ if (!buf)
+ break;
+ if (type != OBJ_TAG) {
+ free(buf);
+ break;
+ }
+
+ p = buf;
+ if (!skip_prefix(p, "object ", &p) ||
+ parse_oid_hex_algop(p, &next, &end, repo->hash_algo) ||
+ *end != '\n') {
+ free(buf);
+ break;
+ }
+
+ depth++;
+ free(buf);
+
+ if (oideq(&next, ¤t))
+ break;
+ oidcpy(¤t, &next);
+ }
+
+ return depth;
+}
+
static void stats_table_setup_structure(struct stats_table *table,
struct repo_structure *stats)
{
@@ -619,6 +735,17 @@ static void stats_table_setup_structure(struct stats_table *table,
" * %s", _("Blobs"));
stats_table_size_addf(table, objects->max_disk_sizes.tags,
" * %s", _("Tags"));
+
+ stats_table_count_addf(table, objects->max_commit_parent_count,
+ " * %s", _("Largest parent count"));
+ stats_table_count_addf(table, objects->max_tree_entry_count,
+ " * %s", _("Largest tree entries"));
+ stats_table_count_addf(table, objects->max_blob_path_length,
+ " * %s", _("Longest blob path"));
+ stats_table_count_addf(table, objects->max_blob_path_depth,
+ " * %s", _("Deepest blob path"));
+ stats_table_count_addf(table, objects->max_tag_chain_depth,
+ " * %s", _("Deepest tag chain"));
}
static void stats_table_print_structure(const struct stats_table *table)
@@ -749,6 +876,17 @@ static void structure_keyvalue_print(struct repo_structure *stats,
printf("objects.tags.max_disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.max_disk_sizes.tags, value_delim);
+ printf("objects.commits.max_parent_count%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_commit_parent_count, value_delim);
+ printf("objects.trees.max_entry_count%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_tree_entry_count, value_delim);
+ printf("objects.blobs.max_path_length%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_blob_path_length, value_delim);
+ printf("objects.blobs.max_path_depth%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_blob_path_depth, value_delim);
+ printf("objects.tags.max_chain_depth%c%" PRIuMAX "%c", key_delim,
+ (uintmax_t)stats->objects.max_tag_chain_depth, value_delim);
+
printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim,
(uintmax_t)stats->objects.disk_sizes.commits, value_delim);
printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim,
@@ -826,7 +964,7 @@ struct count_objects_data {
struct progress *progress;
};
-static int count_objects(const char *path UNUSED, struct oid_array *oids,
+static int count_objects(const char *path, struct oid_array *oids,
enum object_type type, void *cb_data)
{
struct count_objects_data *data = cb_data;
@@ -862,6 +1000,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
switch (type) {
case OBJ_TAG:
+ for (size_t i = 0; i < oids->nr; i++) {
+ size_t tag_chain_depth = get_tag_chain_depth(data->odb->repo,
+ &oids->oid[i]);
+ if (tag_chain_depth > stats->max_tag_chain_depth)
+ stats->max_tag_chain_depth = tag_chain_depth;
+ }
+
stats->type_counts.tags += oids->nr;
stats->inflated_sizes.tags += inflated_total;
if (max_inflated > stats->max_inflated_sizes.tags)
@@ -871,6 +1016,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
stats->max_disk_sizes.tags = max_disk;
break;
case OBJ_COMMIT:
+ for (size_t i = 0; i < oids->nr; i++) {
+ size_t parent_count = get_commit_parent_count(data->odb->repo,
+ &oids->oid[i]);
+ if (parent_count > stats->max_commit_parent_count)
+ stats->max_commit_parent_count = parent_count;
+ }
+
stats->type_counts.commits += oids->nr;
stats->inflated_sizes.commits += inflated_total;
if (max_inflated > stats->max_inflated_sizes.commits)
@@ -880,6 +1032,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
stats->max_disk_sizes.commits = max_disk;
break;
case OBJ_TREE:
+ for (size_t i = 0; i < oids->nr; i++) {
+ size_t entry_count = get_tree_entry_count(data->odb->repo,
+ &oids->oid[i]);
+ if (entry_count > stats->max_tree_entry_count)
+ stats->max_tree_entry_count = entry_count;
+ }
+
stats->type_counts.trees += oids->nr;
stats->inflated_sizes.trees += inflated_total;
if (max_inflated > stats->max_inflated_sizes.trees)
@@ -889,6 +1048,16 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
stats->max_disk_sizes.trees = max_disk;
break;
case OBJ_BLOB:
+ if (path && *path) {
+ size_t path_len = strlen(path);
+ size_t path_depth = get_path_depth(path);
+
+ if (path_len > stats->max_blob_path_length)
+ stats->max_blob_path_length = path_len;
+ if (path_depth > stats->max_blob_path_depth)
+ stats->max_blob_path_depth = path_depth;
+ }
+
stats->type_counts.blobs += oids->nr;
stats->inflated_sizes.blobs += inflated_total;
if (max_inflated > stats->max_inflated_sizes.blobs)
--
gitgitgadget
next prev parent reply other threads:[~2026-02-23 14:21 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-22 18:28 [PATCH 0/3] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-22 18:28 ` [PATCH 1/3] repo: extend info paths " eslam-reda-div via GitGitGadget
2026-02-22 20:35 ` Lucas Seiki Oshiro
2026-02-23 3:02 ` Justin Tobler
2026-02-22 18:28 ` [PATCH 2/3] t1900,t1901: make repo tests hash-agnostic and wc-portable Eslam reda ragheb via GitGitGadget
2026-02-22 20:46 ` Lucas Seiki Oshiro
2026-02-22 18:28 ` [PATCH 3/3] t1900,t1901: fix test portability issues Eslam reda ragheb via GitGitGadget
2026-02-22 22:37 ` [PATCH 0/3] repo: extend info path reporting and structure statistics Junio C Hamano
2026-02-23 14:21 ` [PATCH v2 0/9] " eslam reda via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 1/9] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 2/9] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 4/9] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` Eslam reda ragheb via GitGitGadget [this message]
2026-02-23 14:21 ` [PATCH v2 6/9] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 7/9] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 8/9] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-23 14:21 ` [PATCH v2 9/9] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 0/5] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 1/5] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 2/5] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 3/5] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 4/5] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 19:43 ` [PATCH v3 5/5] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 00/10] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 01/10] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-26 23:21 ` Junio C Hamano
2026-02-26 21:14 ` [PATCH v4 02/10] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-26 23:29 ` Junio C Hamano
2026-02-27 9:04 ` Phillip Wood
2026-02-27 19:51 ` Junio C Hamano
2026-03-01 10:36 ` Phillip Wood
2026-03-02 6:42 ` Junio C Hamano
2026-02-26 21:14 ` [PATCH v4 03/10] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 04/10] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 05/10] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 06/10] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 07/10] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 08/10] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 09/10] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14 ` [PATCH v4 10/10] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 00/11] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 01/11] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-27 21:42 ` Lucas Seiki Oshiro
2026-02-27 19:30 ` [PATCH v5 02/11] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 03/11] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 04/11] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 05/11] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 06/11] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 07/11] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 08/11] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 09/11] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 10/11] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30 ` [PATCH v5 11/11] repo: refine path keys for repo info Eslam reda ragheb via GitGitGadget
2026-03-01 10:33 ` Phillip Wood
2026-02-27 21:52 ` [PATCH v5 00/11] repo info: add category/path keys and --path-format Lucas Seiki Oshiro
2026-03-02 5:15 ` [PATCH v6 0/6] " eslam reda via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 1/6] repo: introduce repo_info context plumbing Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 2/6] repo: support category requests in repo info Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 3/6] repo: add path keys to " Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 4/6] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-03-02 5:15 ` [PATCH v6 6/6] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-03-18 20:44 ` [PATCH v6 0/6] repo info: add category/path keys and --path-format Jialong Wang
2026-03-19 3:36 ` K Jayatheerth
2026-03-19 20:32 ` Jerry Wang
2026-03-20 1:49 ` K Jayatheerth
2026-03-19 20:58 ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Jialong Wang
2026-03-19 20:59 ` [PATCH] t1900: cover repo info path keys in non-default layouts Jialong Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4b502925c9d70d37e0752fadfe061f1cdf692488.1771856469.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=ayu.chandekar@gmail.com \
--cc=eslam.reda.div@gmail.com \
--cc=git@vger.kernel.org \
--cc=jltobler@gmail.com \
--cc=karthik.188@gmail.com \
--cc=lucasseikioshiro@gmail.com \
--cc=siddharthasthana31@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox