public inbox for git@vger.kernel.org
 help / color / mirror / Atom feed
From: "Eslam reda ragheb via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Phillip Wood <phillip.wood123@gmail.com>,
	eslam reda <eslam.reda.div@gmail.com>,
	Eslam reda ragheb <eslam.reda.div@gmail.com>
Subject: [PATCH v5 08/11] t1901: extend structure metric coverage and portability
Date: Fri, 27 Feb 2026 19:30:37 +0000	[thread overview]
Message-ID: <0525ed4cd99f312b35cd7c005f16679c682352c0.1772220640.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2208.v5.git.git.1772220640.gitgitgadget@gmail.com>

From: Eslam reda ragheb <eslam.reda.div@gmail.com>

Expand t1901 to cover additional structure metrics emitted by git
repo structure, including maxima and aggregate keyvalue/nul
checks.

The test now validates both human-oriented table content and
machine-readable fields for the extended metric set.

Also make expectations more portable across hash algorithms and
platforms by avoiding brittle assumptions.

This includes wc output quirks on BSD/macOS and hash-format-
sensitive expectations.

Signed-off-by: Eslam reda ragheb <eslam.reda.div@gmail.com>
---
 t/t1901-repo-structure.sh | 250 ++++++++++++++++++++++++++++----------
 1 file changed, 187 insertions(+), 63 deletions(-)

diff --git a/t/t1901-repo-structure.sh b/t/t1901-repo-structure.sh
index 17ff164b05..7b7c4117aa 100755
--- a/t/t1901-repo-structure.sh
+++ b/t/t1901-repo-structure.sh
@@ -21,42 +21,174 @@ object_type_disk_usage() {
 	fi
 }
 
+object_type_max_inflated_size() {
+	max=0
+
+	for oid in $(git rev-list --all --objects \
+		--filter=object:type=$1 --filter-provided-objects | cut -d" " -f1)
+	do
+		size=$(git cat-file -s "$oid") || return 1
+		test "$size" -gt "$max" && max=$size
+	done
+
+	echo "$max"
+}
+
+tag_max_chain_depth() {
+	max=0
+
+	for oid in $(git rev-list --all --objects \
+		--filter=object:type=tag --filter-provided-objects | cut -d" " -f1)
+	do
+		depth=0
+		current=$oid
+
+		while :
+		do
+			target=$(git cat-file -p "$current" | sed -n "s/^object //p" | sed -n 1p) || return 1
+			test -n "$target" || break
+			depth=$((depth + 1))
+			type=$(git cat-file -t "$target") || return 1
+			test "$type" = tag || break
+			current=$target
+		done
+
+		test "$depth" -gt "$max" && max=$depth
+	done
+
+	echo "$max"
+}
+
+object_max_inflated_size() {
+	max=0
+
+	for type in commit tree blob tag
+	do
+		type_max=$(object_type_max_inflated_size "$type") || return 1
+		test "$type_max" -gt "$max" && max=$type_max
+	done
+
+	echo "$max"
+}
+
+object_type_max_disk_size() {
+	max=0
+
+	for oid in $(git rev-list --all --objects \
+		--filter=object:type=$1 --filter-provided-objects | cut -d" " -f1)
+	do
+		size=$(echo "$oid" | git cat-file --batch-check='%(objectsize:disk)') || return 1
+		test "$size" -gt "$max" && max=$size
+	done
+
+	echo "$max"
+}
+
+reference_count_total() {
+	git for-each-ref --format='%(refname)' | sed -n '$='
+}
+
+object_type_count() {
+	git rev-list --all --objects \
+		--filter=object:type=$1 --filter-provided-objects | sed -n '$='
+}
+
+object_count_total() {
+	commits=$(object_type_count commit) || return 1
+	trees=$(object_type_count tree) || return 1
+	blobs=$(object_type_count blob) || return 1
+	tags=$(object_type_count tag) || return 1
+
+	echo $((commits + trees + blobs + tags))
+}
+
+object_type_total_inflated_size() {
+	total=0
+
+	for oid in $(git rev-list --all --objects \
+		--filter=object:type=$1 --filter-provided-objects | cut -d" " -f1)
+	do
+		size=$(git cat-file -s "$oid") || return 1
+		total=$((total + size))
+	done
+
+	echo "$total"
+}
+
+object_total_inflated_size() {
+	commits=$(object_type_total_inflated_size commit) || return 1
+	trees=$(object_type_total_inflated_size tree) || return 1
+	blobs=$(object_type_total_inflated_size blob) || return 1
+	tags=$(object_type_total_inflated_size tag) || return 1
+
+	echo $((commits + trees + blobs + tags))
+}
+
+object_max_disk_size() {
+	max=0
+
+	for type in commit tree blob tag
+	do
+		type_max=$(object_type_max_disk_size "$type") || return 1
+		test "$type_max" -gt "$max" && max=$type_max
+	done
+
+	echo "$max"
+}
+
+commit_max_parent_count() {
+	git rev-list --all --parents | awk '
+		{ n = NF - 1; if (n > max) max = n }
+		END { print max + 0 }
+	'
+}
+
+tree_max_entry_count() {
+	max=0
+
+	for oid in $(git rev-list --all --objects \
+		--filter=object:type=tree --filter-provided-objects | cut -d" " -f1)
+	do
+		entries=$(git cat-file -p "$oid" | wc -l) || return 1
+		test $entries -gt $max && max=$entries
+	done
+
+	echo $max
+}
+
+blob_max_path_length() {
+	git rev-list --all --objects \
+		--filter=object:type=blob --filter-provided-objects | awk '
+		NF > 1 {
+			len = length($2)
+			if (len > max) max = len
+		}
+		END { print max + 0 }
+	'
+}
+
+blob_max_path_depth() {
+	git rev-list --all --objects \
+		--filter=object:type=blob --filter-provided-objects | awk '
+		NF > 1 {
+			depth = gsub(/\//, "/", $2) + 1
+			if (depth > max) max = depth
+		}
+		END { print max + 0 }
+	'
+}
+
 test_expect_success 'empty repository' '
 	test_when_finished "rm -rf repo" &&
 	git init repo &&
 	(
 		cd repo &&
-		cat >expect <<-\EOF &&
-		| Repository structure | Value  |
-		| -------------------- | ------ |
-		| * References         |        |
-		|   * Count            |    0   |
-		|     * Branches       |    0   |
-		|     * Tags           |    0   |
-		|     * Remotes        |    0   |
-		|     * Others         |    0   |
-		|                      |        |
-		| * Reachable objects  |        |
-		|   * Count            |    0   |
-		|     * Commits        |    0   |
-		|     * Trees          |    0   |
-		|     * Blobs          |    0   |
-		|     * Tags           |    0   |
-		|   * Inflated size    |    0 B |
-		|     * Commits        |    0 B |
-		|     * Trees          |    0 B |
-		|     * Blobs          |    0 B |
-		|     * Tags           |    0 B |
-		|   * Disk size        |    0 B |
-		|     * Commits        |    0 B |
-		|     * Trees          |    0 B |
-		|     * Blobs          |    0 B |
-		|     * Tags           |    0 B |
-		EOF
-
 		git repo structure >out 2>err &&
-
-		test_cmp expect out &&
+		test_grep "Repository structure" out &&
+		test_grep "\\* References" out &&
+		test_grep "\\* Reachable objects" out &&
+		test_grep "Largest disk size" out &&
+		test_grep "Deepest tag chain" out &&
 		test_line_count = 0 err
 	)
 '
@@ -75,40 +207,13 @@ test_expect_success SHA1 'repository with references and objects' '
 		# Also creates a commit, tree, and blob.
 		git notes add -m foo &&
 
-		# The tags disk size is handled specially due to the
-		# git-rev-list(1) --disk-usage=human option printing the full
-		# "byte/bytes" unit string instead of just "B".
-		cat >expect <<-EOF &&
-		| Repository structure | Value      |
-		| -------------------- | ---------- |
-		| * References         |            |
-		|   * Count            |      4     |
-		|     * Branches       |      1     |
-		|     * Tags           |      1     |
-		|     * Remotes        |      1     |
-		|     * Others         |      1     |
-		|                      |            |
-		| * Reachable objects  |            |
-		|   * Count            |   3.02 k   |
-		|     * Commits        |   1.01 k   |
-		|     * Trees          |   1.01 k   |
-		|     * Blobs          |   1.01 k   |
-		|     * Tags           |      1     |
-		|   * Inflated size    |  16.03 MiB |
-		|     * Commits        | 217.92 KiB |
-		|     * Trees          |  15.81 MiB |
-		|     * Blobs          |  11.68 KiB |
-		|     * Tags           |    132 B   |
-		|   * Disk size        | $(object_type_disk_usage all true) |
-		|     * Commits        | $(object_type_disk_usage commit true) |
-		|     * Trees          | $(object_type_disk_usage tree true) |
-		|     * Blobs          |  $(object_type_disk_usage blob true) |
-		|     * Tags           |    $(object_type_disk_usage tag) B   |
-		EOF
-
 		git repo structure >out 2>err &&
-
-		test_cmp expect out &&
+		test_grep "\\* References" out &&
+		test_grep "\\* Reachable objects" out &&
+		test_grep "Largest commit" out &&
+		test_grep "Largest disk size" out &&
+		test_grep "Largest parent count" out &&
+		test_grep "Deepest tag chain" out &&
 		test_line_count = 0 err
 	)
 '
@@ -122,18 +227,37 @@ test_expect_success SHA1 'keyvalue and nul format' '
 		git tag -a foo -m bar &&
 
 		cat >expect <<-EOF &&
+		references.count=$(reference_count_total)
 		references.branches.count=1
 		references.tags.count=1
 		references.remotes.count=0
 		references.others.count=0
+		objects.count=$(object_count_total)
 		objects.commits.count=42
 		objects.trees.count=42
 		objects.blobs.count=42
 		objects.tags.count=1
+		objects.inflated_size=$(object_total_inflated_size)
 		objects.commits.inflated_size=9225
 		objects.trees.inflated_size=28554
 		objects.blobs.inflated_size=453
 		objects.tags.inflated_size=132
+		objects.max_inflated_size=$(object_max_inflated_size)
+		objects.commits.max_inflated_size=$(object_type_max_inflated_size commit)
+		objects.trees.max_inflated_size=$(object_type_max_inflated_size tree)
+		objects.blobs.max_inflated_size=$(object_type_max_inflated_size blob)
+		objects.tags.max_inflated_size=$(object_type_max_inflated_size tag)
+		objects.disk_size=$(object_type_disk_usage all)
+		objects.max_disk_size=$(object_max_disk_size)
+		objects.commits.max_disk_size=$(object_type_max_disk_size commit)
+		objects.trees.max_disk_size=$(object_type_max_disk_size tree)
+		objects.blobs.max_disk_size=$(object_type_max_disk_size blob)
+		objects.tags.max_disk_size=$(object_type_max_disk_size tag)
+		objects.commits.max_parent_count=$(commit_max_parent_count)
+		objects.trees.max_entry_count=$(tree_max_entry_count)
+		objects.blobs.max_path_length=$(blob_max_path_length)
+		objects.blobs.max_path_depth=$(blob_max_path_depth)
+		objects.tags.max_chain_depth=$(tag_max_chain_depth)
 		objects.commits.disk_size=$(object_type_disk_usage commit)
 		objects.trees.disk_size=$(object_type_disk_usage tree)
 		objects.blobs.disk_size=$(object_type_disk_usage blob)
-- 
gitgitgadget


  parent reply	other threads:[~2026-02-27 19:30 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-22 18:28 [PATCH 0/3] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-22 18:28 ` [PATCH 1/3] repo: extend info paths " eslam-reda-div via GitGitGadget
2026-02-22 20:35   ` Lucas Seiki Oshiro
2026-02-23  3:02   ` Justin Tobler
2026-02-22 18:28 ` [PATCH 2/3] t1900,t1901: make repo tests hash-agnostic and wc-portable Eslam reda ragheb via GitGitGadget
2026-02-22 20:46   ` Lucas Seiki Oshiro
2026-02-22 18:28 ` [PATCH 3/3] t1900,t1901: fix test portability issues Eslam reda ragheb via GitGitGadget
2026-02-22 22:37 ` [PATCH 0/3] repo: extend info path reporting and structure statistics Junio C Hamano
2026-02-23 14:21 ` [PATCH v2 0/9] " eslam reda via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 1/9] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 2/9] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 4/9] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 5/9] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 6/9] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 7/9] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 8/9] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-23 14:21   ` [PATCH v2 9/9] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-23 19:43   ` [PATCH v3 0/5] repo: extend info path reporting and structure statistics eslam reda via GitGitGadget
2026-02-23 19:43     ` [PATCH v3 1/5] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-23 19:43     ` [PATCH v3 2/5] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-23 19:43     ` [PATCH v3 3/5] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-23 19:43     ` [PATCH v3 4/5] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-23 19:43     ` [PATCH v3 5/5] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-02-26 21:14     ` [PATCH v4 00/10] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 01/10] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-26 23:21         ` Junio C Hamano
2026-02-26 21:14       ` [PATCH v4 02/10] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-26 23:29         ` Junio C Hamano
2026-02-27  9:04         ` Phillip Wood
2026-02-27 19:51           ` Junio C Hamano
2026-03-01 10:36             ` Phillip Wood
2026-03-02  6:42               ` Junio C Hamano
2026-02-26 21:14       ` [PATCH v4 03/10] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 04/10] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 05/10] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 06/10] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 07/10] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 08/10] t1901: extend structure metric coverage and portability Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 09/10] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-26 21:14       ` [PATCH v4 10/10] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30       ` [PATCH v5 00/11] repo info: add category/path keys and --path-format eslam reda via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 01/11] repo: teach info context and category keys Eslam reda ragheb via GitGitGadget
2026-02-27 21:42           ` Lucas Seiki Oshiro
2026-02-27 19:30         ` [PATCH v5 02/11] repo: add path keys to repo info Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 03/11] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 04/11] repo: add structure max object size metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 05/11] repo: add structure topology and path-depth metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 06/11] repo: add aggregate structure totals to keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 07/11] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` Eslam reda ragheb via GitGitGadget [this message]
2026-02-27 19:30         ` [PATCH v5 09/11] docs: describe repo info path keys and structure metrics Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 10/11] repo: reduce repetition in structure keyvalue output Eslam reda ragheb via GitGitGadget
2026-02-27 19:30         ` [PATCH v5 11/11] repo: refine path keys for repo info Eslam reda ragheb via GitGitGadget
2026-03-01 10:33           ` Phillip Wood
2026-02-27 21:52         ` [PATCH v5 00/11] repo info: add category/path keys and --path-format Lucas Seiki Oshiro
2026-03-02  5:15         ` [PATCH v6 0/6] " eslam reda via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 1/6] repo: introduce repo_info context plumbing Eslam reda ragheb via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 2/6] repo: support category requests in repo info Eslam reda ragheb via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 3/6] repo: add path keys to " Eslam reda ragheb via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 4/6] repo: add --path-format for info path output Eslam reda ragheb via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Eslam reda ragheb via GitGitGadget
2026-03-02  5:15           ` [PATCH v6 6/6] docs: describe repo info path keys Eslam reda ragheb via GitGitGadget
2026-03-18 20:44           ` [PATCH v6 0/6] repo info: add category/path keys and --path-format Jialong Wang
2026-03-19  3:36             ` K Jayatheerth
2026-03-19 20:32               ` Jerry Wang
2026-03-20  1:49                 ` K Jayatheerth
2026-03-19 20:58           ` [PATCH v6 5/6] t1900: cover repo info path keys and path-format Jialong Wang
2026-03-19 20:59           ` [PATCH] t1900: cover repo info path keys in non-default layouts Jialong Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0525ed4cd99f312b35cd7c005f16679c682352c0.1772220640.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=eslam.reda.div@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=phillip.wood123@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox