All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kousik Sanagavarapu <five231003@gmail.com>
To: git@vger.kernel.org
Cc: Kousik Sanagavarapu <five231003@gmail.com>
Subject: [PATCH] ref-filter: sort numerically when ":size" is used
Date: Fri,  1 Sep 2023 19:54:54 +0530	[thread overview]
Message-ID: <20230901142624.12063-1-five231003@gmail.com> (raw)

Atoms like "raw" and "contents" have a ":size" option which can be used
to know the size of the data. Since these atoms have the cmp_type
FIELD_STR, they are sorted alphabetically from 'a' to 'z' and '0' to
'9'. Meaning, even when the ":size" option is used and what we
ultimatlely have is numbers, we still sort alphabetically.

For example, consider the the following case in a repo

refname			contents:size		raw:size
=======			=============		========
refs/heads/branch1	1130			1210
refs/heads/master	300			410
refs/tags/v1.0		140			260

Sorting with "--format="%(refname) %(contents:size) --sort=contents:size"
would give

refs/heads/branch1 1130
refs/tags/v1.0.0 140
refs/heads/master 300

which is an alphabetic sort, while what one might really expect is

refs/tags/v1.0.0 140
refs/heads/master 300
refs/heads/branch1 1130

which is a numeric sort (that is, a "$ sort -n file" as opposed to a
"$ sort file", where "file" contains only the "contents:size" or
"raw:size" info, each of which is on a newline).

Same is the case with "--sort=raw:size".

So, sort numerically whenever the sort is done with "contents:size" or
"raw:size" and do it the normal alphabetic way when "contents" or "raw"
are used with some other option (they are FIELD_STR anyways).

Signed-off-by: Kousik Sanagavarapu <five231003@gmail.com>
---
 ref-filter.c            | 20 +++++++++++++++-----
 t/t6300-for-each-ref.sh | 15 +++++++++++++--
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 1bfaf20fbf..5d7bea5f23 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -932,7 +932,13 @@ struct atom_value {
 	ssize_t s_size;
 	int (*handler)(struct atom_value *atomv, struct ref_formatting_state *state,
 		       struct strbuf *err);
-	uintmax_t value; /* used for sorting when not FIELD_STR */
+
+	/*
+	 * Used for sorting when not FIELD_STR or when FIELD_STR but the
+	 * sort should be numeric and not alphabetic.
+	 */
+	uintmax_t value;
+
 	struct used_atom *atom;
 };
 
@@ -1857,7 +1863,8 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 				v->s = xmemdupz(buf, buf_size);
 				v->s_size = buf_size;
 			} else if (atom->u.raw_data.option == RAW_LENGTH) {
-				v->s = xstrfmt("%"PRIuMAX, (uintmax_t)buf_size);
+				v->value = (uintmax_t)buf_size;
+				v->s = xstrfmt("%"PRIuMAX, v->value);
 			}
 			continue;
 		}
@@ -1883,8 +1890,10 @@ static void grab_sub_body_contents(struct atom_value *val, int deref, struct exp
 			v->s = strbuf_detach(&sb, NULL);
 		} else if (atom->u.contents.option == C_BODY_DEP)
 			v->s = xmemdupz(bodypos, bodylen);
-		else if (atom->u.contents.option == C_LENGTH)
-			v->s = xstrfmt("%"PRIuMAX, (uintmax_t)strlen(subpos));
+		else if (atom->u.contents.option == C_LENGTH) {
+			v->value = (uintmax_t)strlen(subpos);
+			v->s = xstrfmt("%"PRIuMAX, v->value);
+		}
 		else if (atom->u.contents.option == C_BODY)
 			v->s = xmemdupz(bodypos, nonsiglen);
 		else if (atom->u.contents.option == C_SIG)
@@ -2265,6 +2274,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
 
 		v->s_size = ATOM_SIZE_UNSPECIFIED;
 		v->handler = append_atom;
+		v->value = 0;
 		v->atom = atom;
 
 		if (*name == '*') {
@@ -2986,7 +2996,7 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru
 		cmp_detached_head = 1;
 	} else if (s->sort_flags & REF_SORTING_VERSION) {
 		cmp = versioncmp(va->s, vb->s);
-	} else if (cmp_type == FIELD_STR) {
+	} else if (cmp_type == FIELD_STR && !va->value && !vb->value) {
 		if (va->s_size < 0 && vb->s_size < 0) {
 			int (*cmp_fn)(const char *, const char *);
 			cmp_fn = s->sort_flags & REF_SORTING_ICASE
diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index aa3c7c03c4..7b943fd34c 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -1017,16 +1017,16 @@ test_expect_success 'Verify sorts with raw' '
 test_expect_success 'Verify sorts with raw:size' '
 	cat >expected <<-EOF &&
 	refs/myblobs/blob8
-	refs/myblobs/first
 	refs/myblobs/blob7
-	refs/heads/main
 	refs/myblobs/blob4
 	refs/myblobs/blob1
 	refs/myblobs/blob2
 	refs/myblobs/blob3
 	refs/myblobs/blob5
 	refs/myblobs/blob6
+	refs/myblobs/first
 	refs/mytrees/first
+	refs/heads/main
 	EOF
 	git for-each-ref --format="%(refname)" --sort=raw:size \
 		refs/heads/main refs/myblobs/ refs/mytrees/first >actual &&
@@ -1138,6 +1138,17 @@ test_expect_success 'for-each-ref --format compare with cat-file --batch' '
 	test_cmp expected actual
 '
 
+test_expect_success 'verify sorts with contents:size' '
+	cat >expect <<-\EOF &&
+	refs/heads/main
+	refs/heads/newtag
+	refs/heads/ambiguous
+	EOF
+	git for-each-ref --format="%(refname)" \
+		--sort=contents:size refs/heads/ >actual &&
+	test_cmp expect actual
+'
+
 test_expect_success 'set up multiple-sort tags' '
 	for when in 100000 200000
 	do
-- 
2.42.0.51.g5dc72c0fbc.dirty


             reply	other threads:[~2023-09-01 14:27 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-01 14:24 Kousik Sanagavarapu [this message]
2023-09-01 16:43 ` [PATCH] ref-filter: sort numerically when ":size" is used Junio C Hamano
2023-09-01 17:45   ` Jeff King
2023-09-01 17:59     ` Junio C Hamano
2023-09-01 18:32       ` Jeff King
2023-09-01 18:59         ` Kousik Sanagavarapu
2023-09-01 19:16           ` Jeff King
2023-09-01 20:21             ` Junio C Hamano
2023-09-01 20:51               ` Jeff King
2023-09-01 20:04           ` Junio C Hamano
2023-09-01 20:40             ` Jeff King
2023-09-02  9:00 ` [PATCH v2] " Kousik Sanagavarapu
2023-09-02  9:11   ` Kousik Sanagavarapu
2023-09-02 22:19   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230901142624.12063-1-five231003@gmail.com \
    --to=five231003@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.