git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [GSOC][PATCH] apply: address -Wsign-comparison warnings
@ 2025-02-05  1:40 Zejun Zhao
  2025-02-05  7:47 ` Patrick Steinhardt
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-05  1:40 UTC (permalink / raw)
  To: git; +Cc: Zejun Zhao, Junio C Hamano, Elijah Newren, Patrick Steinhardt

There are several -Wsign-comparison warnings in "apply.c", which can be
classified into the following three types:

  1. comparing a `length` of `size_t` type with a result of pointer
  arithmetic of `int` type

  2. comparing a `length` of `size_t` type with a `length` of `int` type

  3. comparing a loop count `i` of `int` type with an unsigned loop
  bound

Fix these warnings following one basic principle: do not touch the
relevant logics and keep the behaviors of the code. Adopt three
different strategies for each of the above three types:

  1. cast the result of pointer arithmetic to `size_t` type

  2. try to change the type of the `length` to `size_t` (may fall back
  to Strategy 1 if the variable is not guaranteed to be unsigned)

  3. use a loop count `i` of `size_t` type

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 73 +++++++++++++++++++++++++++------------------------------
 apply.h |  6 ++---
 2 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/apply.c b/apply.c
index 4a7b6120ac..0c7b89dc3a 100644
--- a/apply.c
+++ b/apply.c
@@ -8,7 +8,6 @@
  */
 
 #define USE_THE_REPOSITORY_VARIABLE
-#define DISABLE_SIGN_COMPARE_WARNINGS
 
 #include "git-compat-util.h"
 #include "abspath.h"
@@ -540,7 +539,7 @@ static size_t date_len(const char *line, size_t len)
 	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
 		return 0;
 
-	if (date - line >= strlen("19") &&
+	if ((size_t) (date - line) >= strlen("19") &&
 	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
 		date -= strlen("19");
 
@@ -1087,11 +1086,11 @@ static int gitdiff_index(struct gitdiff_data *state,
 	 * and optional space with octal mode.
 	 */
 	const char *ptr, *eol;
-	int len;
-	const unsigned hexsz = the_hash_algo->hexsz;
+	size_t len;
+	const size_t hexsz = the_hash_algo->hexsz;
 
 	ptr = strchr(line, '.');
-	if (!ptr || ptr[1] != '.' || hexsz < ptr - line)
+	if (!ptr || ptr[1] != '.' || hexsz < (size_t) (ptr - line))
 		return 0;
 	len = ptr - line;
 	memcpy(patch->old_oid_prefix, line, len);
@@ -1158,7 +1157,7 @@ static const char *skip_tree_prefix(int p_value,
  */
 static char *git_header_name(int p_value,
 			     const char *line,
-			     int llen)
+			     size_t llen)
 {
 	const char *name;
 	const char *second = NULL;
@@ -1207,7 +1206,7 @@ static char *git_header_name(int p_value,
 		cp = skip_tree_prefix(p_value, second, line + llen - second);
 		if (!cp)
 			goto free_and_fail1;
-		if (line + llen - cp != first.len ||
+		if ((size_t) (line + llen - cp) != first.len ||
 		    memcmp(first.buf, cp, first.len))
 			goto free_and_fail1;
 		return strbuf_detach(&first, NULL);
@@ -1240,7 +1239,7 @@ static char *git_header_name(int p_value,
 				goto free_and_fail2;
 
 			len = sp.buf + sp.len - np;
-			if (len < second - name &&
+			if (len < (size_t) (second - name) &&
 			    !strncmp(np, name, len) &&
 			    isspace(name[len])) {
 				/* Good */
@@ -1317,7 +1316,7 @@ int parse_git_diff_header(struct strbuf *root,
 			  int *linenr,
 			  int p_value,
 			  const char *line,
-			  int len,
+			  size_t len,
 			  unsigned int size,
 			  struct patch *patch)
 {
@@ -1371,14 +1370,13 @@ int parse_git_diff_header(struct strbuf *root,
 			{ "index ", gitdiff_index },
 			{ "", gitdiff_unrecognized },
 		};
-		int i;
 
 		len = linelen(line, size);
 		if (!len || line[len-1] != '\n')
 			break;
-		for (i = 0; i < ARRAY_SIZE(optable); i++) {
+		for (size_t i = 0; i < ARRAY_SIZE(optable); i++) {
 			const struct opentry *p = optable + i;
-			int oplen = strlen(p->str);
+			size_t oplen = strlen(p->str);
 			int res;
 			if (len < oplen || memcmp(p->str, line, oplen))
 				continue;
@@ -1592,7 +1590,7 @@ static int find_header(struct apply_state *state,
 								size, patch);
 			if (git_hdr_len < 0)
 				return -128;
-			if (git_hdr_len <= len)
+			if ((size_t) git_hdr_len <= len)
 				continue;
 			*hdrsize = git_hdr_len;
 			return offset;
@@ -2097,7 +2095,6 @@ static void add_name_limit(struct apply_state *state,
 static int use_patch(struct apply_state *state, struct patch *p)
 {
 	const char *pathname = p->new_name ? p->new_name : p->old_name;
-	int i;
 
 	/* Paths outside are not touched regardless of "--include" */
 	if (state->prefix && *state->prefix) {
@@ -2107,7 +2104,7 @@ static int use_patch(struct apply_state *state, struct patch *p)
 	}
 
 	/* See if it matches any of exclude/include rule */
-	for (i = 0; i < state->limit_by_name.nr; i++) {
+	for (size_t i = 0; i < state->limit_by_name.nr; i++) {
 		struct string_list_item *it = &state->limit_by_name.items[i];
 		if (!wildmatch(it->string, pathname, 0))
 			return (it->util != NULL);
@@ -2185,7 +2182,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 			};
 			int i;
 			for (i = 0; binhdr[i]; i++) {
-				int len = strlen(binhdr[i]);
+				size_t len = strlen(binhdr[i]);
 				if (len < size - hd &&
 				    !memcmp(binhdr[i], buffer + hd, len)) {
 					state->linenr++;
@@ -2238,7 +2235,8 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 {
 	struct strbuf qname = STRBUF_INIT;
 	char *cp = patch->new_name ? patch->new_name : patch->old_name;
-	int max, add, del;
+	size_t max;
+	int add, del;
 
 	quote_c_style(cp, &qname, NULL, 0);
 
@@ -2257,12 +2255,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 	}
 
 	if (patch->is_binary) {
-		printf(" %-*s |  Bin\n", max, qname.buf);
+		printf(" %-*s |  Bin\n", (int) max, qname.buf);
 		strbuf_release(&qname);
 		return;
 	}
 
-	printf(" %-*s |", max, qname.buf);
+	printf(" %-*s |", (int) max, qname.buf);
 	strbuf_release(&qname);
 
 	/*
@@ -2319,7 +2317,7 @@ static void update_pre_post_images(struct image *preimage,
 {
 	struct image fixed_preimage = IMAGE_INIT;
 	size_t insert_pos = 0;
-	int i, ctx, reduced;
+	size_t ctx, reduced;
 	const char *fixed;
 
 	/*
@@ -2328,7 +2326,7 @@ static void update_pre_post_images(struct image *preimage,
 	 * free "oldlines".
 	 */
 	image_prepare(&fixed_preimage, buf, len, 1);
-	for (i = 0; i < fixed_preimage.line_nr; i++)
+	for (size_t i = 0; i < fixed_preimage.line_nr; i++)
 		fixed_preimage.line[i].flag = preimage->line[i].flag;
 	image_clear(preimage);
 	*preimage = fixed_preimage;
@@ -2337,7 +2335,7 @@ static void update_pre_post_images(struct image *preimage,
 	/*
 	 * Adjust the common context lines in postimage.
 	 */
-	for (i = reduced = ctx = 0; i < postimage->line_nr; i++) {
+	for (size_t i = reduced = ctx = 0; i < postimage->line_nr; i++) {
 		size_t l_len = postimage->line[i].len;
 
 		if (!(postimage->line[i].flag & LINE_COMMON)) {
@@ -2417,9 +2415,9 @@ static int line_by_line_fuzzy_match(struct image *img,
 				    struct image *postimage,
 				    unsigned long current,
 				    int current_lno,
-				    int preimage_limit)
+				    size_t preimage_limit)
 {
-	int i;
+	size_t i;
 	size_t imgoff = 0;
 	size_t preoff = 0;
 	size_t extra_chars;
@@ -2486,12 +2484,12 @@ static int match_fragment(struct apply_state *state,
 			  unsigned ws_rule,
 			  int match_beginning, int match_end)
 {
-	int i;
+	size_t i;
 	const char *orig, *target;
 	struct strbuf fixed = STRBUF_INIT;
 	char *fixed_buf;
 	size_t fixed_len;
-	int preimage_limit;
+	size_t preimage_limit;
 	int ret;
 
 	if (preimage->line_nr + current_lno <= img->line_nr) {
@@ -2663,12 +2661,11 @@ static int match_fragment(struct apply_state *state,
 	for ( ; i < preimage->line_nr; i++) {
 		size_t fixstart = fixed.len; /* start of the fixed preimage */
 		size_t oldlen = preimage->line[i].len;
-		int j;
 
 		/* Try fixing the line in the preimage */
 		ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL);
 
-		for (j = fixstart; j < fixed.len; j++) {
+		for (size_t j = fixstart; j < fixed.len; j++) {
 			if (!isspace(fixed.buf[j])) {
 				ret = 0;
 				goto out;
@@ -2705,7 +2702,7 @@ static int find_pos(struct apply_state *state,
 {
 	int i;
 	unsigned long backwards, forwards, current;
-	int backwards_lno, forwards_lno, current_lno;
+	size_t backwards_lno, forwards_lno, current_lno;
 
 	/*
 	 * When running with --allow-overlap, it is possible that a hunk is
@@ -2790,7 +2787,7 @@ static int find_pos(struct apply_state *state,
  */
 static void update_image(struct apply_state *state,
 			 struct image *img,
-			 int applied_pos,
+			 size_t applied_pos,
 			 struct image *preimage,
 			 struct image *postimage)
 {
@@ -2798,11 +2795,11 @@ static void update_image(struct apply_state *state,
 	 * remove the copy of preimage at offset in img
 	 * and replace it with postimage
 	 */
-	int i, nr;
+	int nr;
 	size_t remove_count, insert_count, applied_at = 0;
 	size_t result_alloc;
 	char *result;
-	int preimage_limit;
+	size_t preimage_limit;
 
 	/*
 	 * If we are removing blank lines at the end of img,
@@ -2817,11 +2814,11 @@ static void update_image(struct apply_state *state,
 	if (preimage_limit > img->line_nr - applied_pos)
 		preimage_limit = img->line_nr - applied_pos;
 
-	for (i = 0; i < applied_pos; i++)
+	for (size_t i = 0; i < applied_pos; i++)
 		applied_at += img->line[i].len;
 
 	remove_count = 0;
-	for (i = 0; i < preimage_limit; i++)
+	for (size_t i = 0; i < preimage_limit; i++)
 		remove_count += img->line[applied_pos + i].len;
 	insert_count = postimage->buf.len;
 
@@ -2850,7 +2847,7 @@ static void update_image(struct apply_state *state,
 			   img->line_nr - (applied_pos + preimage_limit));
 	COPY_ARRAY(img->line + applied_pos, postimage->line, postimage->line_nr);
 	if (!state->allow_overlap)
-		for (i = 0; i < postimage->line_nr; i++)
+		for (size_t i = 0; i < postimage->line_nr; i++)
 			img->line[applied_pos + i].flag |= LINE_PATCHED;
 	img->line_nr = nr;
 }
@@ -4287,19 +4284,19 @@ static void summary_patch_list(struct patch *patch)
 
 static void patch_stats(struct apply_state *state, struct patch *patch)
 {
-	int lines = patch->lines_added + patch->lines_deleted;
+	unsigned lines = patch->lines_added + patch->lines_deleted;
 
 	if (lines > state->max_change)
 		state->max_change = lines;
 	if (patch->old_name) {
-		int len = quote_c_style(patch->old_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->old_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->old_name);
 		if (len > state->max_len)
 			state->max_len = len;
 	}
 	if (patch->new_name) {
-		int len = quote_c_style(patch->new_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->new_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->new_name);
 		if (len > state->max_len)
diff --git a/apply.h b/apply.h
index 90e887ec0e..f53fbb1777 100644
--- a/apply.h
+++ b/apply.h
@@ -90,8 +90,8 @@ struct apply_state {
 	 * we've seen, and the longest filename. That allows us to do simple
 	 * scaling.
 	 */
-	int max_change;
-	int max_len;
+	unsigned max_change;
+	size_t max_len;
 
 	/*
 	 * Records filenames that have been touched, in order to handle
@@ -170,7 +170,7 @@ int parse_git_diff_header(struct strbuf *root,
 			  int *linenr,
 			  int p_value,
 			  const char *line,
-			  int len,
+			  size_t len,
 			  unsigned int size,
 			  struct patch *patch);
 

base-commit: bc204b742735ae06f65bb20291c95985c9633b7f
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-05  1:40 [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
@ 2025-02-05  7:47 ` Patrick Steinhardt
  2025-02-05 15:42   ` Zejun Zhao
  2025-02-05 12:58 ` Junio C Hamano
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Patrick Steinhardt @ 2025-02-05  7:47 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, Junio C Hamano, Elijah Newren

On Wed, Feb 05, 2025 at 01:40:55AM +0000, Zejun Zhao wrote:
> There are several -Wsign-comparison warnings in "apply.c", which can be
> classified into the following three types:
> 
>   1. comparing a `length` of `size_t` type with a result of pointer
>   arithmetic of `int` type
> 
>   2. comparing a `length` of `size_t` type with a `length` of `int` type
> 
>   3. comparing a loop count `i` of `int` type with an unsigned loop
>   bound
> 
> Fix these warnings following one basic principle: do not touch the
> relevant logics and keep the behaviors of the code. Adopt three
> different strategies for each of the above three types:
> 
>   1. cast the result of pointer arithmetic to `size_t` type
> 
>   2. try to change the type of the `length` to `size_t` (may fall back
>   to Strategy 1 if the variable is not guaranteed to be unsigned)
> 
>   3. use a loop count `i` of `size_t` type

You should split up this patch into a series, as it is really hard to
follow what's going on. There are a couple of things happening:

  - You change types in `struct apply_state`, which bubbles up.

  - You adapt `git_hdr_len()` to receive different inputs, which bubbles
    up.

  - You perform small fixes in several places.

It might also be a good idea to split out the loop counters into a
separate commit, as those are trivially correct.

> Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
> ---
>  apply.c | 73 +++++++++++++++++++++++++++------------------------------
>  apply.h |  6 ++---
>  2 files changed, 38 insertions(+), 41 deletions(-)
> 
> diff --git a/apply.c b/apply.c
> index 4a7b6120ac..0c7b89dc3a 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -8,7 +8,6 @@
>   */
>  
>  #define USE_THE_REPOSITORY_VARIABLE
> -#define DISABLE_SIGN_COMPARE_WARNINGS
>  
>  #include "git-compat-util.h"
>  #include "abspath.h"
> @@ -540,7 +539,7 @@ static size_t date_len(const char *line, size_t len)
>  	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
>  		return 0;
>  
> -	if (date - line >= strlen("19") &&
> +	if ((size_t) (date - line) >= strlen("19") &&

We know that `date` is always bigger than or equal to `line`, so this is
correct.

> @@ -1087,11 +1086,11 @@ static int gitdiff_index(struct gitdiff_data *state,
>  	 * and optional space with octal mode.
>  	 */
>  	const char *ptr, *eol;
> -	int len;
> -	const unsigned hexsz = the_hash_algo->hexsz;
> +	size_t len;
> +	const size_t hexsz = the_hash_algo->hexsz;

The change to `hexsz` shouldn't be needed, even if it makes us match the
type of `hexsz` as declared in `git_hash_algo`.

>  	ptr = strchr(line, '.');
> -	if (!ptr || ptr[1] != '.' || hexsz < ptr - line)
> +	if (!ptr || ptr[1] != '.' || hexsz < (size_t) (ptr - line))

`ptr` is the reline of `strrchr(line)`, so it must be either `NULL` or
greater than or equal to `line`, so this cast is fine.

> @@ -1158,7 +1157,7 @@ static const char *skip_tree_prefix(int p_value,
>   */
>  static char *git_header_name(int p_value,
>  			     const char *line,
> -			     int llen)
> +			     size_t llen)
>  {
>  	const char *name;
>  	const char *second = NULL;

It would make sense to split this change out into a separate commit, as
it bubbles up into calling functions, as well.

> @@ -1207,7 +1206,7 @@ static char *git_header_name(int p_value,
>  		cp = skip_tree_prefix(p_value, second, line + llen - second);
>  		if (!cp)
>  			goto free_and_fail1;
> -		if (line + llen - cp != first.len ||
> +		if ((size_t) (line + llen - cp) != first.len ||
>  		    memcmp(first.buf, cp, first.len))
>  			goto free_and_fail1;
>  		return strbuf_detach(&first, NULL);

This cast should be fine, too.

> @@ -1240,7 +1239,7 @@ static char *git_header_name(int p_value,
>  				goto free_and_fail2;
>  
>  			len = sp.buf + sp.len - np;
> -			if (len < second - name &&
> +			if (len < (size_t) (second - name) &&
>  			    !strncmp(np, name, len) &&
>  			    isspace(name[len])) {
>  				/* Good */

This one, too. `second` is iterating through `name`, so it's always
greater or equal to `name`.

> @@ -1371,14 +1370,13 @@ int parse_git_diff_header(struct strbuf *root,
>  			{ "index ", gitdiff_index },
>  			{ "", gitdiff_unrecognized },
>  		};
> -		int i;
>  
>  		len = linelen(line, size);
>  		if (!len || line[len-1] != '\n')
>  			break;
> -		for (i = 0; i < ARRAY_SIZE(optable); i++) {
> +		for (size_t i = 0; i < ARRAY_SIZE(optable); i++) {
>  			const struct opentry *p = optable + i;
> -			int oplen = strlen(p->str);
> +			size_t oplen = strlen(p->str);
>  			int res;
>  			if (len < oplen || memcmp(p->str, line, oplen))
>  				continue;

Makes sense.

> @@ -1592,7 +1590,7 @@ static int find_header(struct apply_state *state,
>  								size, patch);
>  			if (git_hdr_len < 0)
>  				return -128;
> -			if (git_hdr_len <= len)
> +			if ((size_t) git_hdr_len <= len)
>  				continue;
>  			*hdrsize = git_hdr_len;
>  			return offset;

We've asserted that `git_hdr_len` isn't negative, so the cast is fine.

> @@ -2185,7 +2182,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
>  			};
>  			int i;

This may arguably be `size_t`, as well.

> @@ -2257,12 +2255,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
>  	}
>  
>  	if (patch->is_binary) {
> -		printf(" %-*s |  Bin\n", max, qname.buf);
> +		printf(" %-*s |  Bin\n", (int) max, qname.buf);
>  		strbuf_release(&qname);
>  		return;
>  	}
>  
> -	printf(" %-*s |", max, qname.buf);
> +	printf(" %-*s |", (int) max, qname.buf);
>  	strbuf_release(&qname);
>  
>  	/*

Do we _know_ that `max` fits into an `int`?

Patrick

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-05  1:40 [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
  2025-02-05  7:47 ` Patrick Steinhardt
@ 2025-02-05 12:58 ` Junio C Hamano
  2025-02-05 16:49   ` Zejun Zhao
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
  3 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2025-02-05 12:58 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, Elijah Newren, Patrick Steinhardt


> @@ -1087,11 +1086,11 @@ static int gitdiff_index(struct gitdiff_data *state,
>  	 * and optional space with octal mode.
>  	 */
>  	const char *ptr, *eol;
> -	int len;
> -	const unsigned hexsz = the_hash_algo->hexsz;
> +	size_t len;
> +	const size_t hexsz = the_hash_algo->hexsz;

I thought that I already saw this discussed in another thread.

The .hexsz of any hash algorithm would never be larger than what a
platform natural "unsigned" integer type can hold, so using size_t
for the member _is_ the wrong thing to do and the fix may be the
other way around, no?

There are genuinely good changes (correcting assigned variable from
int to size_t when the value ultimately came from a system function
that returns size_t) in this patch, but there are other annoying
ones like these, so I am not sure.

>  	ptr = strchr(line, '.');
> -	if (!ptr || ptr[1] != '.' || hexsz < ptr - line)
> +	if (!ptr || ptr[1] != '.' || hexsz < (size_t) (ptr - line))

Is this about -Wsign-compare complaining about size_t vs ptrdiff_t?

> @@ -1207,7 +1206,7 @@ static char *git_header_name(int p_value,
>  		cp = skip_tree_prefix(p_value, second, line + llen - second);
>  		if (!cp)
>  			goto free_and_fail1;
> -		if (line + llen - cp != first.len ||
> +		if ((size_t) (line + llen - cp) != first.len ||

Ditto.

> -			if (len < second - name &&
> +			if (len < (size_t) (second - name) &&

Ditto.

I said this before, but I am not sure if being strict about
"-Wsign-compare" is really buying us much.  If we are getting so
many false positives that need to be squelched by churning the code
with so many typecasts in order to find a new and real problem,
is it really worth it?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-05  7:47 ` Patrick Steinhardt
@ 2025-02-05 15:42   ` Zejun Zhao
  0 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-05 15:42 UTC (permalink / raw)
  To: ps; +Cc: git, gitster, jelly.zhao.42, newren

Thank you for reviewing this patch.

On Wed, Feb 05, 2025 08:47:06 +0100, Patrick Steinhardt wrote:
> You should split up this patch into a series, as it is really hard to
> follow what's going on. There are a couple of things happening:
> 
>   - You change types in `struct apply_state`, which bubbles up.
> 
>   - You adapt `git_hdr_len()` to receive different inputs, which bubbles
>     up.
> 
>   - You perform small fixes in several places.
> 
> It might also be a good idea to split out the loop counters into a
> separate commit, as those are trivially correct.

Sure I'll come up with a v2 patch series, in which each kind of fixes 
will be put in a single commit and I'll state why I believe the type 
cast/change is safe for every single fix in the commit message.

> > @@ -1087,11 +1086,11 @@ static int gitdiff_index(struct gitdiff_data *state,
> >  	 * and optional space with octal mode.
> >  	 */
> >  	const char *ptr, *eol;
> > -	int len;
> > -	const unsigned hexsz = the_hash_algo->hexsz;
> > +	size_t len;
> > +	const size_t hexsz = the_hash_algo->hexsz;
> 
> The change to `hexsz` shouldn't be needed, even if it makes us match the
> type of `hexsz` as declared in `git_hash_algo`.

Yes it's not necessary here to change the type. And for the `hexsz` stuff,
on Wed, Feb 05 2025 04:58:57 -0800, Junio C Hamano wrote,
> I thought that I already saw this discussed in another thread.
> 
> The .hexsz of any hash algorithm would never be larger than what a
> platform natural "unsigned" integer type can hold, so using size_t
> for the member _is_ the wrong thing to do and the fix may be the
> other way around, no?

I found the discussion mentioned at [1]. It seems like the change here 
only makes things worse so I'll see if I'd leave it untouched or change 
the type of `.hexsz` member to `int` or something.

[1] https://lore.kernel.org/git/xmqqttaqw2eb.fsf@gitster.g/

> > @@ -2185,7 +2182,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
> >  			};
> >  			int i;
> 
> This may arguably be `size_t`, as well.

It's OK for me to use a `size_t` loop count everywhere but I tried to 
keep the changes in this patch minimal (forget about the `hexsz` thing).
I could apply this change if you insist.

> > @@ -2257,12 +2255,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
> >  	}
> >  
> >  	if (patch->is_binary) {
> > -		printf(" %-*s |  Bin\n", max, qname.buf);
> > +		printf(" %-*s |  Bin\n", (int) max, qname.buf);
> >  		strbuf_release(&qname);
> >  		return;
> >  	}
> >  
> > -	printf(" %-*s |", max, qname.buf);
> > +	printf(" %-*s |", (int) max, qname.buf);
> >  	strbuf_release(&qname);
> >  
> >  	/*
> 
> Do we _know_ that `max` fits into an `int`?

Yep we've set an upper bound for `max` before:
> /*
>  * "scale" the filename
>  */
> max = state->max_len;
> if (max > 50)
> 	      max = 50;
so it must fit into an `int`.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-05 12:58 ` Junio C Hamano
@ 2025-02-05 16:49   ` Zejun Zhao
  0 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-05 16:49 UTC (permalink / raw)
  To: gitster; +Cc: git, jelly.zhao.42, newren, ps

Thank you for reviewing this patch.

On Wed, Feb 05 2025 04:58:57 -0800, Junio C Hamano wrote,
> > @@ -1087,11 +1086,11 @@ static int gitdiff_index(struct gitdiff_data *state,
> >  	 * and optional space with octal mode.
> >  	 */
> >  	const char *ptr, *eol;
> > -	int len;
> > -	const unsigned hexsz = the_hash_algo->hexsz;
> > +	size_t len;
> > +	const size_t hexsz = the_hash_algo->hexsz;
> 
> I thought that I already saw this discussed in another thread.
> 
> The .hexsz of any hash algorithm would never be larger than what a
> platform natural "unsigned" integer type can hold, so using size_t
> for the member _is_ the wrong thing to do and the fix may be the
> other way around, no?

Sorry I didn't saw the comment at [1]. You are right about this. I 
prefer changing types of the `git_hash_algo::*sz` family to `unsigned`.

[1] https://lore.kernel.org/git/xmqqttaqw2eb.fsf@gitster.g/


> >  	ptr = strchr(line, '.');
> > -	if (!ptr || ptr[1] != '.' || hexsz < ptr - line)
> > +	if (!ptr || ptr[1] != '.' || hexsz < (size_t) (ptr - line))
> 
> Is this about -Wsign-compare complaining about size_t vs ptrdiff_t?
> 
> > @@ -1207,7 +1206,7 @@ static char *git_header_name(int p_value,
> >  		cp = skip_tree_prefix(p_value, second, line + llen - second);
> >  		if (!cp)
> >  			goto free_and_fail1;
> > -		if (line + llen - cp != first.len ||
> > +		if ((size_t) (line + llen - cp) != first.len ||
> 
> Ditto.
> 
> > -			if (len < second - name &&
> > +			if (len < (size_t) (second - name) &&
> 
> Ditto.

Yes it's comparing unsigned (`size_t`) with signed (`ptrdiff_t`).

> I said this before, but I am not sure if being strict about
> "-Wsign-compare" is really buying us much.  If we are getting so
> many false positives that need to be squelched by churning the code
> with so many typecasts in order to find a new and real problem,
> is it really worth it?

Well, I contributed most before to a Rust OS kernel project so 
honestly I'm used to the idea of explicit type casting when necessary 
to avoid possible bugs. However, this is non-trivial. People should 
understand each relevant variable's semantics to decide what its type 
should be and do typecast on top of that. Even if it's already made a 
gradual process, it may still cost much from both the contributors and 
the reviewers but brings benefits that do not match up. Maybe a clear 
commit message that states why each type cast/change makes sense can 
help with the iteration? I'm not quite sure about that.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 0/6] apply: address -Wsign-comparison warnings
  2025-02-05  1:40 [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
  2025-02-05  7:47 ` Patrick Steinhardt
  2025-02-05 12:58 ` Junio C Hamano
@ 2025-02-09  8:12 ` Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned Zejun Zhao
                     ` (5 more replies)
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
  3 siblings, 6 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

There are several -Wsign-comparison warnings in "apply.c", which can be
classified into the following three types:

  1. comparing a length of `size_t` type with a `ptrdiff_t` value

  2. comparing a length of `size_t` type with a length of `int` type

  3. comparing a loop counter `i` of `int` type with an unsigned loop
  bound

Fix these warnings following one basic principle: do not touch the
relevant logics and keep the behaviors of the code. Adopt three
different strategies for each of the above three types:

  1. cast the `ptrdiff_t` values to `size_t` type

  2. try to change the type of the `int` length to `size_t` (may fall 
  back to Strategy 1 if the variable is not guaranteed to be unsigned)

  3. use a loop counter `i` of `size_t` type

Zejun Zhao (6):
  apply: change fields in `apply_state` to unsigned
  apply: change some variables from `int` to `size_t`
  apply: do a typecast to eliminate warnings
  apply: cast some ptrdiff_t's to size_t's
  apply: use `size_t` loop counters
  apply: enable -Wsign-comparison checks

 apply.c | 69 +++++++++++++++++++++++++++------------------------------
 apply.h |  4 ++--
 2 files changed, 35 insertions(+), 38 deletions(-)


base-commit: bc204b742735ae06f65bb20291c95985c9633b7f
-- 
2.43.0


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  2025-02-13  9:51     ` Karthik Nayak
  2025-02-09  8:12   ` [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
                     ` (4 subsequent siblings)
  5 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

`.max_change` and `.max_len` of `apply_state` are only used as unsigned
integers. Misuse of `int` type would cause -Wsign-comparison warnings.

Fix this by

  - change `.max_change`'s type to `unsigned` since it's just a counter

  - change `.max_len`'s type to `size_t` since it's a length

  - change the types of relevant variables in function `show_stats`

Note that `printf`'s format string requires us to do some typecast
(from `size_t` to `int`) on `max` in function `show_stats`. This is
safe because we already set a upper bound of `50` for `max` before the
cast.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 9 +++++----
 apply.h | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/apply.c b/apply.c
index 4a7b6120ac..831b338155 100644
--- a/apply.c
+++ b/apply.c
@@ -2238,7 +2238,8 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 {
 	struct strbuf qname = STRBUF_INIT;
 	char *cp = patch->new_name ? patch->new_name : patch->old_name;
-	int max, add, del;
+	size_t max;
+	unsigned add, del;
 
 	quote_c_style(cp, &qname, NULL, 0);
 
@@ -2257,12 +2258,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 	}
 
 	if (patch->is_binary) {
-		printf(" %-*s |  Bin\n", max, qname.buf);
+		printf(" %-*s |  Bin\n", (int) max, qname.buf);
 		strbuf_release(&qname);
 		return;
 	}
 
-	printf(" %-*s |", max, qname.buf);
+	printf(" %-*s |", (int) max, qname.buf);
 	strbuf_release(&qname);
 
 	/*
@@ -2273,7 +2274,7 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 	del = patch->lines_deleted;
 
 	if (state->max_change > 0) {
-		int total = ((add + del) * max + state->max_change / 2) / state->max_change;
+		unsigned total = ((add + del) * max + state->max_change / 2) / state->max_change;
 		add = (add * max + state->max_change / 2) / state->max_change;
 		del = total - add;
 	}
diff --git a/apply.h b/apply.h
index 90e887ec0e..f7f369d44f 100644
--- a/apply.h
+++ b/apply.h
@@ -90,8 +90,8 @@ struct apply_state {
 	 * we've seen, and the longest filename. That allows us to do simple
 	 * scaling.
 	 */
-	int max_change;
-	int max_len;
+	unsigned max_change;
+	size_t max_len;
 
 	/*
 	 * Records filenames that have been touched, in order to handle
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t`
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  2025-02-13  6:08     ` Patrick Steinhardt
  2025-02-09  8:12   ` [GSOC][PATCH v2 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
                     ` (3 subsequent siblings)
  5 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

Some assigned variables are mistyped as `int`, including

  - those whose values come from a system function returning `size_t`,

  - those that are used for array indexing,

  - those that represent length/size/distance,

some of which will trigger -Wsign-comparison warnings.

Change some of them to `size_t`/`unsigned`.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/apply.c b/apply.c
index 831b338155..b4ae74a5fb 100644
--- a/apply.c
+++ b/apply.c
@@ -1087,7 +1087,7 @@ static int gitdiff_index(struct gitdiff_data *state,
 	 * and optional space with octal mode.
 	 */
 	const char *ptr, *eol;
-	int len;
+	size_t len;
 	const unsigned hexsz = the_hash_algo->hexsz;
 
 	ptr = strchr(line, '.');
@@ -2185,7 +2185,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 			};
 			int i;
 			for (i = 0; binhdr[i]; i++) {
-				int len = strlen(binhdr[i]);
+				size_t len = strlen(binhdr[i]);
 				if (len < size - hd &&
 				    !memcmp(binhdr[i], buffer + hd, len)) {
 					state->linenr++;
@@ -2320,7 +2320,8 @@ static void update_pre_post_images(struct image *preimage,
 {
 	struct image fixed_preimage = IMAGE_INIT;
 	size_t insert_pos = 0;
-	int i, ctx, reduced;
+	int i, reduced;
+	size_t ctx;
 	const char *fixed;
 
 	/*
@@ -2492,7 +2493,7 @@ static int match_fragment(struct apply_state *state,
 	struct strbuf fixed = STRBUF_INIT;
 	char *fixed_buf;
 	size_t fixed_len;
-	int preimage_limit;
+	size_t preimage_limit;
 	int ret;
 
 	if (preimage->line_nr + current_lno <= img->line_nr) {
@@ -2706,7 +2707,7 @@ static int find_pos(struct apply_state *state,
 {
 	int i;
 	unsigned long backwards, forwards, current;
-	int backwards_lno, forwards_lno, current_lno;
+	size_t backwards_lno, forwards_lno, current_lno;
 
 	/*
 	 * When running with --allow-overlap, it is possible that a hunk is
@@ -2791,7 +2792,7 @@ static int find_pos(struct apply_state *state,
  */
 static void update_image(struct apply_state *state,
 			 struct image *img,
-			 int applied_pos,
+			 size_t applied_pos,
 			 struct image *preimage,
 			 struct image *postimage)
 {
@@ -2803,7 +2804,7 @@ static void update_image(struct apply_state *state,
 	size_t remove_count, insert_count, applied_at = 0;
 	size_t result_alloc;
 	char *result;
-	int preimage_limit;
+	size_t preimage_limit;
 
 	/*
 	 * If we are removing blank lines at the end of img,
@@ -4288,19 +4289,19 @@ static void summary_patch_list(struct patch *patch)
 
 static void patch_stats(struct apply_state *state, struct patch *patch)
 {
-	int lines = patch->lines_added + patch->lines_deleted;
+	unsigned lines = patch->lines_added + patch->lines_deleted;
 
 	if (lines > state->max_change)
 		state->max_change = lines;
 	if (patch->old_name) {
-		int len = quote_c_style(patch->old_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->old_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->old_name);
 		if (len > state->max_len)
 			state->max_len = len;
 	}
 	if (patch->new_name) {
-		int len = quote_c_style(patch->new_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->new_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->new_name);
 		if (len > state->max_len)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 3/6] apply: do a typecast to eliminate warnings
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

`git_hdr_len` is an `int` variable that can be negative and is used to
compare against a `len` of `size_t`, which will trigger
-Wsign-comparison warnings

Cast `git_hdr_len` to `size_t` after an above-zero check.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apply.c b/apply.c
index b4ae74a5fb..605a0aa2e3 100644
--- a/apply.c
+++ b/apply.c
@@ -1592,7 +1592,7 @@ static int find_header(struct apply_state *state,
 								size, patch);
 			if (git_hdr_len < 0)
 				return -128;
-			if (git_hdr_len <= len)
+			if ((size_t) git_hdr_len <= len)
 				continue;
 			*hdrsize = git_hdr_len;
 			return offset;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 4/6] apply: cast some ptrdiff_t's to size_t's
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
                     ` (2 preceding siblings ...)
  2025-02-09  8:12   ` [GSOC][PATCH v2 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters Zejun Zhao
  2025-02-09  8:12   ` [GSOC][PATCH v2 6/6] apply: enable -Wsign-comparison checks Zejun Zhao
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

There are several -Wsign-comparison warnings in "apply.c", complaining
about us comparing ptrdiff_t's with size_t's.

Fix these warnings by typecasting from ptrdiff_t to size_t. As to why
the casts is safe,

  - in function `date_len`, `date` is the starting address of a date at
  the end of the `line` and is guaranteed to be larger than (or equal
  to) `line`

  - in function `git_header_name`, `cp` is guaranteed to be larger than
  (or equal to) `second`, so `line + len` is greater than (or equal to)
  `cp` since we already treat `line + len - second` as a size_t

  - in function `git_header_name`, we are iterating `name` using
  `second`, so `second` is guaranteed to be greater than (or equal to)
  `name`

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/apply.c b/apply.c
index 605a0aa2e3..72464fb6c2 100644
--- a/apply.c
+++ b/apply.c
@@ -540,7 +540,7 @@ static size_t date_len(const char *line, size_t len)
 	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
 		return 0;
 
-	if (date - line >= strlen("19") &&
+	if ((size_t) (date - line) >= strlen("19") &&
 	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
 		date -= strlen("19");
 
@@ -1207,7 +1207,7 @@ static char *git_header_name(int p_value,
 		cp = skip_tree_prefix(p_value, second, line + llen - second);
 		if (!cp)
 			goto free_and_fail1;
-		if (line + llen - cp != first.len ||
+		if ((size_t) (line + llen - cp) != first.len ||
 		    memcmp(first.buf, cp, first.len))
 			goto free_and_fail1;
 		return strbuf_detach(&first, NULL);
@@ -1240,7 +1240,7 @@ static char *git_header_name(int p_value,
 				goto free_and_fail2;
 
 			len = sp.buf + sp.len - np;
-			if (len < second - name &&
+			if (len < (size_t) (second - name) &&
 			    !strncmp(np, name, len) &&
 			    isspace(name[len])) {
 				/* Good */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
                     ` (3 preceding siblings ...)
  2025-02-09  8:12   ` [GSOC][PATCH v2 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  2025-02-13  6:08     ` Patrick Steinhardt
  2025-02-09  8:12   ` [GSOC][PATCH v2 6/6] apply: enable -Wsign-comparison checks Zejun Zhao
  5 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

Some `int` loop counters trigger -Wsign-comparison warnings.

Use `size_t` loop counters.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/apply.c b/apply.c
index 72464fb6c2..585f534732 100644
--- a/apply.c
+++ b/apply.c
@@ -1371,12 +1371,11 @@ int parse_git_diff_header(struct strbuf *root,
 			{ "index ", gitdiff_index },
 			{ "", gitdiff_unrecognized },
 		};
-		int i;
 
 		len = linelen(line, size);
 		if (!len || line[len-1] != '\n')
 			break;
-		for (i = 0; i < ARRAY_SIZE(optable); i++) {
+		for (size_t i = 0; i < ARRAY_SIZE(optable); i++) {
 			const struct opentry *p = optable + i;
 			int oplen = strlen(p->str);
 			int res;
@@ -2097,7 +2096,6 @@ static void add_name_limit(struct apply_state *state,
 static int use_patch(struct apply_state *state, struct patch *p)
 {
 	const char *pathname = p->new_name ? p->new_name : p->old_name;
-	int i;
 
 	/* Paths outside are not touched regardless of "--include" */
 	if (state->prefix && *state->prefix) {
@@ -2107,7 +2105,7 @@ static int use_patch(struct apply_state *state, struct patch *p)
 	}
 
 	/* See if it matches any of exclude/include rule */
-	for (i = 0; i < state->limit_by_name.nr; i++) {
+	for (size_t i = 0; i < state->limit_by_name.nr; i++) {
 		struct string_list_item *it = &state->limit_by_name.items[i];
 		if (!wildmatch(it->string, pathname, 0))
 			return (it->util != NULL);
@@ -2183,8 +2181,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 				"Files ",
 				NULL,
 			};
-			int i;
-			for (i = 0; binhdr[i]; i++) {
+			for (size_t i = 0; binhdr[i]; i++) {
 				size_t len = strlen(binhdr[i]);
 				if (len < size - hd &&
 				    !memcmp(binhdr[i], buffer + hd, len)) {
@@ -2320,7 +2317,7 @@ static void update_pre_post_images(struct image *preimage,
 {
 	struct image fixed_preimage = IMAGE_INIT;
 	size_t insert_pos = 0;
-	int i, reduced;
+	int reduced;
 	size_t ctx;
 	const char *fixed;
 
@@ -2330,7 +2327,7 @@ static void update_pre_post_images(struct image *preimage,
 	 * free "oldlines".
 	 */
 	image_prepare(&fixed_preimage, buf, len, 1);
-	for (i = 0; i < fixed_preimage.line_nr; i++)
+	for (size_t i = 0; i < fixed_preimage.line_nr; i++)
 		fixed_preimage.line[i].flag = preimage->line[i].flag;
 	image_clear(preimage);
 	*preimage = fixed_preimage;
@@ -2339,7 +2336,7 @@ static void update_pre_post_images(struct image *preimage,
 	/*
 	 * Adjust the common context lines in postimage.
 	 */
-	for (i = reduced = ctx = 0; i < postimage->line_nr; i++) {
+	for (size_t i = reduced = ctx = 0; i < postimage->line_nr; i++) {
 		size_t l_len = postimage->line[i].len;
 
 		if (!(postimage->line[i].flag & LINE_COMMON)) {
@@ -2419,9 +2416,9 @@ static int line_by_line_fuzzy_match(struct image *img,
 				    struct image *postimage,
 				    unsigned long current,
 				    int current_lno,
-				    int preimage_limit)
+				    size_t preimage_limit)
 {
-	int i;
+	size_t i;
 	size_t imgoff = 0;
 	size_t preoff = 0;
 	size_t extra_chars;
@@ -2488,7 +2485,7 @@ static int match_fragment(struct apply_state *state,
 			  unsigned ws_rule,
 			  int match_beginning, int match_end)
 {
-	int i;
+	size_t i;
 	const char *orig, *target;
 	struct strbuf fixed = STRBUF_INIT;
 	char *fixed_buf;
@@ -2665,12 +2662,11 @@ static int match_fragment(struct apply_state *state,
 	for ( ; i < preimage->line_nr; i++) {
 		size_t fixstart = fixed.len; /* start of the fixed preimage */
 		size_t oldlen = preimage->line[i].len;
-		int j;
 
 		/* Try fixing the line in the preimage */
 		ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL);
 
-		for (j = fixstart; j < fixed.len; j++) {
+		for (size_t j = fixstart; j < fixed.len; j++) {
 			if (!isspace(fixed.buf[j])) {
 				ret = 0;
 				goto out;
@@ -2800,7 +2796,7 @@ static void update_image(struct apply_state *state,
 	 * remove the copy of preimage at offset in img
 	 * and replace it with postimage
 	 */
-	int i, nr;
+	int nr;
 	size_t remove_count, insert_count, applied_at = 0;
 	size_t result_alloc;
 	char *result;
@@ -2819,11 +2815,11 @@ static void update_image(struct apply_state *state,
 	if (preimage_limit > img->line_nr - applied_pos)
 		preimage_limit = img->line_nr - applied_pos;
 
-	for (i = 0; i < applied_pos; i++)
+	for (size_t i = 0; i < applied_pos; i++)
 		applied_at += img->line[i].len;
 
 	remove_count = 0;
-	for (i = 0; i < preimage_limit; i++)
+	for (size_t i = 0; i < preimage_limit; i++)
 		remove_count += img->line[applied_pos + i].len;
 	insert_count = postimage->buf.len;
 
@@ -2852,7 +2848,7 @@ static void update_image(struct apply_state *state,
 			   img->line_nr - (applied_pos + preimage_limit));
 	COPY_ARRAY(img->line + applied_pos, postimage->line, postimage->line_nr);
 	if (!state->allow_overlap)
-		for (i = 0; i < postimage->line_nr; i++)
+		for (size_t i = 0; i < postimage->line_nr; i++)
 			img->line[applied_pos + i].flag |= LINE_PATCHED;
 	img->line_nr = nr;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v2 6/6] apply: enable -Wsign-comparison checks
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
                     ` (4 preceding siblings ...)
  2025-02-09  8:12   ` [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters Zejun Zhao
@ 2025-02-09  8:12   ` Zejun Zhao
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-09  8:12 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps

Remove the `#define DISABLE_SIGN_COMPARE_WARNINGS` header line.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/apply.c b/apply.c
index 585f534732..51d0f4813f 100644
--- a/apply.c
+++ b/apply.c
@@ -8,7 +8,6 @@
  */
 
 #define USE_THE_REPOSITORY_VARIABLE
-#define DISABLE_SIGN_COMPARE_WARNINGS
 
 #include "git-compat-util.h"
 #include "abspath.h"
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t`
  2025-02-09  8:12   ` [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
@ 2025-02-13  6:08     ` Patrick Steinhardt
  2025-02-16  7:12       ` [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
  0 siblings, 1 reply; 31+ messages in thread
From: Patrick Steinhardt @ 2025-02-13  6:08 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, gitster, newren

On Sun, Feb 09, 2025 at 08:12:12AM +0000, Zejun Zhao wrote:
> Some assigned variables are mistyped as `int`, including
> 
>   - those whose values come from a system function returning `size_t`,
> 
>   - those that are used for array indexing,
> 
>   - those that represent length/size/distance,
> 
> some of which will trigger -Wsign-comparison warnings.
> 
> Change some of them to `size_t`/`unsigned`.
> 
> Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
> ---
>  apply.c | 21 +++++++++++----------
>  1 file changed, 11 insertions(+), 10 deletions(-)
> 
> diff --git a/apply.c b/apply.c
> index 831b338155..b4ae74a5fb 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -1087,7 +1087,7 @@ static int gitdiff_index(struct gitdiff_data *state,
>  	 * and optional space with octal mode.
>  	 */
>  	const char *ptr, *eol;
> -	int len;
> +	size_t len;
>  	const unsigned hexsz = the_hash_algo->hexsz;
>  
>  	ptr = strchr(line, '.');

This is storing the result of `ptr - line`, which will be a positive
integer. It's later passed to `memcpy()`, which expects a `size_t`.

> @@ -2320,7 +2320,8 @@ static void update_pre_post_images(struct image *preimage,
>  {
>  	struct image fixed_preimage = IMAGE_INIT;
>  	size_t insert_pos = 0;
> -	int i, ctx, reduced;
> +	int i, reduced;
> +	size_t ctx;
>  	const char *fixed;
>  
>  	/*

`ctx` is indexing an array and counts against `preimage->line_nr`, which
is a `size_t`, too.

> @@ -2492,7 +2493,7 @@ static int match_fragment(struct apply_state *state,
>  	struct strbuf fixed = STRBUF_INIT;
>  	char *fixed_buf;
>  	size_t fixed_len;
> -	int preimage_limit;
> +	size_t preimage_limit;
>  	int ret;
>  
>  	if (preimage->line_nr + current_lno <= img->line_nr) {

This one stores `struct image::line_nr`, which is a `size_t`.

> @@ -2706,7 +2707,7 @@ static int find_pos(struct apply_state *state,
>  {
>  	int i;
>  	unsigned long backwards, forwards, current;
> -	int backwards_lno, forwards_lno, current_lno;
> +	size_t backwards_lno, forwards_lno, current_lno;
>  
>  	/*
>  	 * When running with --allow-overlap, it is possible that a hunk is

These are a bit curious, as they store `line`, which is itself an `int`
parameter. As far as I understand, the only caller is also only ever
passing a positive integer here.

> @@ -2791,7 +2792,7 @@ static int find_pos(struct apply_state *state,
>   */
>  static void update_image(struct apply_state *state,
>  			 struct image *img,
> -			 int applied_pos,
> +			 size_t applied_pos,
>  			 struct image *preimage,
>  			 struct image *postimage)
>  {
> @@ -2803,7 +2804,7 @@ static void update_image(struct apply_state *state,
>  	size_t remove_count, insert_count, applied_at = 0;
>  	size_t result_alloc;
>  	char *result;
> -	int preimage_limit;
> +	size_t preimage_limit;
>  
>  	/*
>  	 * If we are removing blank lines at the end of img,

The caller makes sure that the function only gets called  when the
parameter is a positive integer.

> @@ -4288,19 +4289,19 @@ static void summary_patch_list(struct patch *patch)
>  
>  static void patch_stats(struct apply_state *state, struct patch *patch)
>  {
> -	int lines = patch->lines_added + patch->lines_deleted;
> +	unsigned lines = patch->lines_added + patch->lines_deleted;

This one is curious again, as the type of these variables is an `int`.
This should likely be adapted in tandem if they cannot be negative.

Patrick

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters
  2025-02-09  8:12   ` [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters Zejun Zhao
@ 2025-02-13  6:08     ` Patrick Steinhardt
  0 siblings, 0 replies; 31+ messages in thread
From: Patrick Steinhardt @ 2025-02-13  6:08 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, gitster, newren

On Sun, Feb 09, 2025 at 08:12:15AM +0000, Zejun Zhao wrote:
> diff --git a/apply.c b/apply.c
> index 72464fb6c2..585f534732 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -2419,9 +2416,9 @@ static int line_by_line_fuzzy_match(struct image *img,
>  				    struct image *postimage,
>  				    unsigned long current,
>  				    int current_lno,
> -				    int preimage_limit)
> +				    size_t preimage_limit)
>  {
> -	int i;
> +	size_t i;
>  	size_t imgoff = 0;
>  	size_t preoff = 0;
>  	size_t extra_chars;

This one does more than just changing the loop counters. Probably makes
sense to split out into a separate commit.

Patrick

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned
  2025-02-09  8:12   ` [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned Zejun Zhao
@ 2025-02-13  9:51     ` Karthik Nayak
  2025-02-13 18:39       ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Karthik Nayak @ 2025-02-13  9:51 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, gitster, newren, ps

[-- Attachment #1: Type: text/plain, Size: 3029 bytes --]

Zejun Zhao <jelly.zhao.42@gmail.com> writes:

> `.max_change` and `.max_len` of `apply_state` are only used as unsigned
> integers. Misuse of `int` type would cause -Wsign-comparison warnings.
>
> Fix this by
>
>   - change `.max_change`'s type to `unsigned` since it's just a counter
>

Looking at `.max_change` it seems like this is only assigned in
`patch_stats()` where we do

  int lines = patch->lines_added + patch->lines_deleted;

  if (lines > state->max_change)
     state->max_change = lines;

In this case shouldn't we first convert `.lines_added` `.lines_deleted`
to also be 'unsigned int' in the first place?

>   - change `.max_len`'s type to `size_t` since it's a length
>

I see that this is assigned the return value of `strlen()` so this
makes sense.

>   - change the types of relevant variables in function `show_stats`
>
> Note that `printf`'s format string requires us to do some typecast
> (from `size_t` to `int`) on `max` in function `show_stats`. This is
> safe because we already set a upper bound of `50` for `max` before the
> cast.
>
> Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
> ---
>  apply.c | 9 +++++----
>  apply.h | 4 ++--
>  2 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/apply.c b/apply.c
> index 4a7b6120ac..831b338155 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -2238,7 +2238,8 @@ static void show_stats(struct apply_state *state, struct patch *patch)
>  {
>  	struct strbuf qname = STRBUF_INIT;
>  	char *cp = patch->new_name ? patch->new_name : patch->old_name;
> -	int max, add, del;
> +	size_t max;
> +	unsigned add, del;
>

Tangential to this patch, I don't think we have a guideline on using
'unsigned' vs 'unsigned int'. Probably we should.

>  	quote_c_style(cp, &qname, NULL, 0);
>
> @@ -2257,12 +2258,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
>  	}
>
>  	if (patch->is_binary) {
> -		printf(" %-*s |  Bin\n", max, qname.buf);
> +		printf(" %-*s |  Bin\n", (int) max, qname.buf);
>  		strbuf_release(&qname);
>  		return;
>  	}
>
> -	printf(" %-*s |", max, qname.buf);
> +	printf(" %-*s |", (int) max, qname.buf);
>  	strbuf_release(&qname);
>
>  	/*
> @@ -2273,7 +2274,7 @@ static void show_stats(struct apply_state *state, struct patch *patch)
>  	del = patch->lines_deleted;
>
>  	if (state->max_change > 0) {
> -		int total = ((add + del) * max + state->max_change / 2) / state->max_change;
> +		unsigned total = ((add + del) * max + state->max_change / 2) / state->max_change;
>  		add = (add * max + state->max_change / 2) / state->max_change;
>  		del = total - add;
>  	}
> diff --git a/apply.h b/apply.h
> index 90e887ec0e..f7f369d44f 100644
> --- a/apply.h
> +++ b/apply.h
> @@ -90,8 +90,8 @@ struct apply_state {
>  	 * we've seen, and the longest filename. That allows us to do simple
>  	 * scaling.
>  	 */
> -	int max_change;
> -	int max_len;
> +	unsigned max_change;
> +	size_t max_len;
>
>  	/*
>  	 * Records filenames that have been touched, in order to handle
> --
> 2.43.0

The rest look good.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned
  2025-02-13  9:51     ` Karthik Nayak
@ 2025-02-13 18:39       ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2025-02-13 18:39 UTC (permalink / raw)
  To: Karthik Nayak; +Cc: Zejun Zhao, git, newren, ps

Karthik Nayak <karthik.188@gmail.com> writes:

> Zejun Zhao <jelly.zhao.42@gmail.com> writes:
>
>> `.max_change` and `.max_len` of `apply_state` are only used as unsigned
>> integers. Misuse of `int` type would cause -Wsign-comparison warnings.
>>
>> Fix this by
>>
>>   - change `.max_change`'s type to `unsigned` since it's just a counter
>>
>
> Looking at `.max_change` it seems like this is only assigned in
> `patch_stats()` where we do
>
>   int lines = patch->lines_added + patch->lines_deleted;
>
>   if (lines > state->max_change)
>      state->max_change = lines;
>
> In this case shouldn't we first convert `.lines_added` `.lines_deleted`
> to also be 'unsigned int' in the first place?

Surely.  Or if any of the internal API uses a calling convention
that yields number of lines on success and negative number to signal
errors, we could also unify them to signed integer instead.  In
either case, using types consistently is good, and thanks for sharp
eyes spotting this instance.

>> @@ -2257,12 +2258,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
>>  	}
>>
>>  	if (patch->is_binary) {
>> -		printf(" %-*s |  Bin\n", max, qname.buf);
>> +		printf(" %-*s |  Bin\n", (int) max, qname.buf);
>>  		strbuf_release(&qname);
>>  		return;
>>  	}
>>
>> -	printf(" %-*s |", max, qname.buf);
>> +	printf(" %-*s |", (int) max, qname.buf);
>>  	strbuf_release(&qname);
>>

This is the kind of fallout that makes the resulting code harder to
read.  How bad would the code churn be if we instead unify to the
signed integer type, instead of using size_t, and making sure we
use the range-checking versions of arithmetic when needed, I have to
wonder?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-13  6:08     ` Patrick Steinhardt
@ 2025-02-16  7:12       ` Zejun Zhao
  2025-02-18 17:49         ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:12 UTC (permalink / raw)
  To: ps; +Cc: git, gitster, jelly.zhao.42, newren

On Thu, Feb 13 2025 07:08:13 +0100, Patrick Steinhardt wrote,
> > @@ -2706,7 +2707,7 @@ static int find_pos(struct apply_state *state,
> >  {
> >  	int i;
> >  	unsigned long backwards, forwards, current;
> > -	int backwards_lno, forwards_lno, current_lno;
> > +	size_t backwards_lno, forwards_lno, current_lno;
> >  
> >  	/*
> >  	 * When running with --allow-overlap, it is possible that a hunk is
> 
> These are a bit curious, as they store `line`, which is itself an `int`
> parameter. As far as I understand, the only caller is also only ever
> passing a positive integer here.

They do store an `int` parameter, which is `line`. However, we cannot change 
`line` to unsigned since it can temporarily store an negative value before 
the assignments to these `*_lno` happen. Below are from function `find_pos` 
(without any change).

  /*
   * If match_beginning or match_end is specified, there is no
   * point starting from a wrong line that will never match and
   * wander around and wait for a match at the specified end.
   */
  if (match_beginning)
	  line = 0;
  else if (match_end)
	  line = img->line_nr - preimage->line_nr;

  /*
   * Because the comparison is unsigned, the following test
   * will also take care of a negative line number that can
   * result when match_end and preimage is larger than the target.
   */
  if (line > img->line_nr)
	  line = img->line_nr;

> > @@ -4288,19 +4289,19 @@ static void summary_patch_list(struct patch *patch)
> >  
> >  static void patch_stats(struct apply_state *state, struct patch *patch)
> >  {
> > -	int lines = patch->lines_added + patch->lines_deleted;
> > +	unsigned lines = patch->lines_added + patch->lines_deleted;
> 
> This one is curious again, as the type of these variables is an `int`.
> This should likely be adapted in tandem if they cannot be negative.

Thank you for identifying. Will be in next version.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings
  2025-02-05  1:40 [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
                   ` (2 preceding siblings ...)
  2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
@ 2025-02-16  7:28 ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 1/6] apply: correct type misuse in `apply.h` Zejun Zhao
                     ` (5 more replies)
  3 siblings, 6 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

This is the v3 of this patch set.

Changes from v2:

  - change `patch::lines_added/deleted` to unsigned

  - use `unsigned int` instead of `unsigned`

  - separate irrelevant changes from the "loop counter" commit

Below are the cover letter from the v2.

There are several -Wsign-comparison warnings in "apply.c", which can be
classified into the following three types:

  1. comparing a length of `size_t` type with a `ptrdiff_t` value

  2. comparing a length of `size_t` type with a length of `int` type

  3. comparing a loop counter `i` of `int` type with an unsigned loop
  bound

Fix these warnings following one basic principle: do not touch the
relevant logics and keep the behaviors of the code. Adopt three
different strategies for each of the above three types:

  1. cast the `ptrdiff_t` values to `size_t` type

  2. try to change the type of the `int` length to `size_t` (may fall 
  back to Strategy 1 if the variable is not guaranteed to be unsigned)

  3. use a loop counter `i` of `size_t` type

Zejun Zhao (6):
  apply: correct type misuse in `apply.h`
  apply: change some variables from `int` to `size_t`
  apply: do a typecast to eliminate warnings
  apply: cast some ptrdiff_t's to size_t's
  apply: use `size_t` loop counters
  apply: enable -Wsign-comparison checks

 apply.c | 69 +++++++++++++++++++++++++++------------------------------
 apply.h |  6 ++---
 2 files changed, 36 insertions(+), 39 deletions(-)

Range-diff against v2:
1:  16ee2f93a3 ! 1:  a5985e0e16 apply: change fields in `apply_state` to unsigned
    @@ Metadata
     Author: Zejun Zhao <jelly.zhao.42@gmail.com>
     
      ## Commit message ##
    -    apply: change fields in `apply_state` to unsigned
    +    apply: correct type misuse in `apply.h`
     
    -    `.max_change` and `.max_len` of `apply_state` are only used as unsigned
    -    integers. Misuse of `int` type would cause -Wsign-comparison warnings.
    +    There are a few `int` fields of structs in `apply.h` that are only used
    +    as unsigned integer and are only assigned unsigned integer value. Some
    +    of these misuse of `int` type would cause -Wsign-comparison warnings.
     
         Fix this by
     
    -      - change `.max_change`'s type to `unsigned` since it's just a counter
    +      - change `apply_state::max_change`'s type to `unsigned int` since it's
    +        just a counter of lines
     
    -      - change `.max_len`'s type to `size_t` since it's a length
    +      - change `apply_state::max_len`'s type to `size_t` since it's storing
    +        a length
     
    -      - change the types of relevant variables in function `show_stats`
    +      - change `patch::lines_added/deleted`'s types to `unsigned int` since
    +        they are just counters of lines
    +
    +      - change the types of relevant variables in function `show_stats` and
    +        `patch_stats`
     
         Note that `printf`'s format string requires us to do some typecast
         (from `size_t` to `int`) on `max` in function `show_stats`. This is
    @@ apply.c: static void show_stats(struct apply_state *state, struct patch *patch)
      	char *cp = patch->new_name ? patch->new_name : patch->old_name;
     -	int max, add, del;
     +	size_t max;
    -+	unsigned add, del;
    ++	unsigned int add, del;
      
      	quote_c_style(cp, &qname, NULL, 0);
      
    @@ apply.c: static void show_stats(struct apply_state *state, struct patch *patch)
      
      	if (state->max_change > 0) {
     -		int total = ((add + del) * max + state->max_change / 2) / state->max_change;
    -+		unsigned total = ((add + del) * max + state->max_change / 2) / state->max_change;
    ++		unsigned int total = ((add + del) * max + state->max_change / 2) / state->max_change;
      		add = (add * max + state->max_change / 2) / state->max_change;
      		del = total - add;
      	}
    +@@ apply.c: static void summary_patch_list(struct patch *patch)
    + 
    + static void patch_stats(struct apply_state *state, struct patch *patch)
    + {
    +-	int lines = patch->lines_added + patch->lines_deleted;
    ++	unsigned int lines = patch->lines_added + patch->lines_deleted;
    + 
    + 	if (lines > state->max_change)
    + 		state->max_change = lines;
     
      ## apply.h ##
     @@ apply.h: struct apply_state {
    @@ apply.h: struct apply_state {
      	 */
     -	int max_change;
     -	int max_len;
    -+	unsigned max_change;
    ++	unsigned int max_change;
     +	size_t max_len;
      
      	/*
      	 * Records filenames that have been touched, in order to handle
    +@@ apply.h: struct patch {
    + 	int is_new, is_delete;	/* -1 = unknown, 0 = false, 1 = true */
    + 	int rejected;
    + 	unsigned ws_rule;
    +-	int lines_added, lines_deleted;
    ++	unsigned int lines_added, lines_deleted;
    + 	int score;
    + 	int extension_linenr; /* first line specifying delete/new/rename/copy */
    + 	unsigned int is_toplevel_relative:1;
2:  962ab421cf ! 2:  b73f3e9fae apply: change some variables from `int` to `size_t`
    @@ Commit message
     
         some of which will trigger -Wsign-comparison warnings.
     
    -    Change some of them to `size_t`/`unsigned`.
    +    Change some of them to `size_t`/`unsigned int`.
     
         Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
     
    @@ apply.c: static void update_pre_post_images(struct image *preimage,
      	const char *fixed;
      
      	/*
    +@@ apply.c: static int line_by_line_fuzzy_match(struct image *img,
    + 				    struct image *postimage,
    + 				    unsigned long current,
    + 				    int current_lno,
    +-				    int preimage_limit)
    ++				    size_t preimage_limit)
    + {
    + 	int i;
    + 	size_t imgoff = 0;
     @@ apply.c: static int match_fragment(struct apply_state *state,
      	struct strbuf fixed = STRBUF_INIT;
      	char *fixed_buf;
    @@ apply.c: static void update_image(struct apply_state *state,
      
      	/*
      	 * If we are removing blank lines at the end of img,
    -@@ apply.c: static void summary_patch_list(struct patch *patch)
    - 
    - static void patch_stats(struct apply_state *state, struct patch *patch)
    - {
    --	int lines = patch->lines_added + patch->lines_deleted;
    -+	unsigned lines = patch->lines_added + patch->lines_deleted;
    - 
    +@@ apply.c: static void patch_stats(struct apply_state *state, struct patch *patch)
      	if (lines > state->max_change)
      		state->max_change = lines;
      	if (patch->old_name) {
3:  09f42cbd5a = 3:  85938a18b1 apply: do a typecast to eliminate warnings
4:  1142213871 = 4:  ba31e9406d apply: cast some ptrdiff_t's to size_t's
5:  8e27fb9c08 ! 5:  351e561843 apply: use `size_t` loop counters
    @@ apply.c: static void update_pre_post_images(struct image *preimage,
      
      		if (!(postimage->line[i].flag & LINE_COMMON)) {
     @@ apply.c: static int line_by_line_fuzzy_match(struct image *img,
    - 				    struct image *postimage,
    - 				    unsigned long current,
      				    int current_lno,
    --				    int preimage_limit)
    -+				    size_t preimage_limit)
    + 				    size_t preimage_limit)
      {
     -	int i;
     +	size_t i;
6:  60155b1aeb = 6:  cdb3e806a1 apply: enable -Wsign-comparison checks
-- 
2.43.0


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3 1/6] apply: correct type misuse in `apply.h`
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
                     ` (4 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

There are a few `int` fields of structs in `apply.h` that are only used
as unsigned integer and are only assigned unsigned integer value. Some
of these misuse of `int` type would cause -Wsign-comparison warnings.

Fix this by

  - change `apply_state::max_change`'s type to `unsigned int` since it's
    just a counter of lines

  - change `apply_state::max_len`'s type to `size_t` since it's storing
    a length

  - change `patch::lines_added/deleted`'s types to `unsigned int` since
    they are just counters of lines

  - change the types of relevant variables in function `show_stats` and
    `patch_stats`

Note that `printf`'s format string requires us to do some typecast
(from `size_t` to `int`) on `max` in function `show_stats`. This is
safe because we already set a upper bound of `50` for `max` before the
cast.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 11 ++++++-----
 apply.h |  6 +++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/apply.c b/apply.c
index 4a7b6120ac..4380a83386 100644
--- a/apply.c
+++ b/apply.c
@@ -2238,7 +2238,8 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 {
 	struct strbuf qname = STRBUF_INIT;
 	char *cp = patch->new_name ? patch->new_name : patch->old_name;
-	int max, add, del;
+	size_t max;
+	unsigned int add, del;
 
 	quote_c_style(cp, &qname, NULL, 0);
 
@@ -2257,12 +2258,12 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 	}
 
 	if (patch->is_binary) {
-		printf(" %-*s |  Bin\n", max, qname.buf);
+		printf(" %-*s |  Bin\n", (int) max, qname.buf);
 		strbuf_release(&qname);
 		return;
 	}
 
-	printf(" %-*s |", max, qname.buf);
+	printf(" %-*s |", (int) max, qname.buf);
 	strbuf_release(&qname);
 
 	/*
@@ -2273,7 +2274,7 @@ static void show_stats(struct apply_state *state, struct patch *patch)
 	del = patch->lines_deleted;
 
 	if (state->max_change > 0) {
-		int total = ((add + del) * max + state->max_change / 2) / state->max_change;
+		unsigned int total = ((add + del) * max + state->max_change / 2) / state->max_change;
 		add = (add * max + state->max_change / 2) / state->max_change;
 		del = total - add;
 	}
@@ -4287,7 +4288,7 @@ static void summary_patch_list(struct patch *patch)
 
 static void patch_stats(struct apply_state *state, struct patch *patch)
 {
-	int lines = patch->lines_added + patch->lines_deleted;
+	unsigned int lines = patch->lines_added + patch->lines_deleted;
 
 	if (lines > state->max_change)
 		state->max_change = lines;
diff --git a/apply.h b/apply.h
index 90e887ec0e..8cd22756e2 100644
--- a/apply.h
+++ b/apply.h
@@ -90,8 +90,8 @@ struct apply_state {
 	 * we've seen, and the longest filename. That allows us to do simple
 	 * scaling.
 	 */
-	int max_change;
-	int max_len;
+	unsigned int max_change;
+	size_t max_len;
 
 	/*
 	 * Records filenames that have been touched, in order to handle
@@ -127,7 +127,7 @@ struct patch {
 	int is_new, is_delete;	/* -1 = unknown, 0 = false, 1 = true */
 	int rejected;
 	unsigned ws_rule;
-	int lines_added, lines_deleted;
+	unsigned int lines_added, lines_deleted;
 	int score;
 	int extension_linenr; /* first line specifying delete/new/rename/copy */
 	unsigned int is_toplevel_relative:1;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 2/6] apply: change some variables from `int` to `size_t`
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 1/6] apply: correct type misuse in `apply.h` Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
                     ` (3 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

Some assigned variables are mistyped as `int`, including

  - those whose values come from a system function returning `size_t`,

  - those that are used for array indexing,

  - those that represent length/size/distance,

some of which will trigger -Wsign-comparison warnings.

Change some of them to `size_t`/`unsigned int`.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/apply.c b/apply.c
index 4380a83386..4aa47a22b9 100644
--- a/apply.c
+++ b/apply.c
@@ -1087,7 +1087,7 @@ static int gitdiff_index(struct gitdiff_data *state,
 	 * and optional space with octal mode.
 	 */
 	const char *ptr, *eol;
-	int len;
+	size_t len;
 	const unsigned hexsz = the_hash_algo->hexsz;
 
 	ptr = strchr(line, '.');
@@ -2185,7 +2185,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 			};
 			int i;
 			for (i = 0; binhdr[i]; i++) {
-				int len = strlen(binhdr[i]);
+				size_t len = strlen(binhdr[i]);
 				if (len < size - hd &&
 				    !memcmp(binhdr[i], buffer + hd, len)) {
 					state->linenr++;
@@ -2320,7 +2320,8 @@ static void update_pre_post_images(struct image *preimage,
 {
 	struct image fixed_preimage = IMAGE_INIT;
 	size_t insert_pos = 0;
-	int i, ctx, reduced;
+	int i, reduced;
+	size_t ctx;
 	const char *fixed;
 
 	/*
@@ -2418,7 +2419,7 @@ static int line_by_line_fuzzy_match(struct image *img,
 				    struct image *postimage,
 				    unsigned long current,
 				    int current_lno,
-				    int preimage_limit)
+				    size_t preimage_limit)
 {
 	int i;
 	size_t imgoff = 0;
@@ -2492,7 +2493,7 @@ static int match_fragment(struct apply_state *state,
 	struct strbuf fixed = STRBUF_INIT;
 	char *fixed_buf;
 	size_t fixed_len;
-	int preimage_limit;
+	size_t preimage_limit;
 	int ret;
 
 	if (preimage->line_nr + current_lno <= img->line_nr) {
@@ -2706,7 +2707,7 @@ static int find_pos(struct apply_state *state,
 {
 	int i;
 	unsigned long backwards, forwards, current;
-	int backwards_lno, forwards_lno, current_lno;
+	size_t backwards_lno, forwards_lno, current_lno;
 
 	/*
 	 * When running with --allow-overlap, it is possible that a hunk is
@@ -2791,7 +2792,7 @@ static int find_pos(struct apply_state *state,
  */
 static void update_image(struct apply_state *state,
 			 struct image *img,
-			 int applied_pos,
+			 size_t applied_pos,
 			 struct image *preimage,
 			 struct image *postimage)
 {
@@ -2803,7 +2804,7 @@ static void update_image(struct apply_state *state,
 	size_t remove_count, insert_count, applied_at = 0;
 	size_t result_alloc;
 	char *result;
-	int preimage_limit;
+	size_t preimage_limit;
 
 	/*
 	 * If we are removing blank lines at the end of img,
@@ -4293,14 +4294,14 @@ static void patch_stats(struct apply_state *state, struct patch *patch)
 	if (lines > state->max_change)
 		state->max_change = lines;
 	if (patch->old_name) {
-		int len = quote_c_style(patch->old_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->old_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->old_name);
 		if (len > state->max_len)
 			state->max_len = len;
 	}
 	if (patch->new_name) {
-		int len = quote_c_style(patch->new_name, NULL, NULL, 0);
+		size_t len = quote_c_style(patch->new_name, NULL, NULL, 0);
 		if (!len)
 			len = strlen(patch->new_name);
 		if (len > state->max_len)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 3/6] apply: do a typecast to eliminate warnings
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 1/6] apply: correct type misuse in `apply.h` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
                     ` (2 subsequent siblings)
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

`git_hdr_len` is an `int` variable that can be negative and is used to
compare against a `len` of `size_t`, which will trigger
-Wsign-comparison warnings

Cast `git_hdr_len` to `size_t` after an above-zero check.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apply.c b/apply.c
index 4aa47a22b9..ac3e599bdf 100644
--- a/apply.c
+++ b/apply.c
@@ -1592,7 +1592,7 @@ static int find_header(struct apply_state *state,
 								size, patch);
 			if (git_hdr_len < 0)
 				return -128;
-			if (git_hdr_len <= len)
+			if ((size_t) git_hdr_len <= len)
 				continue;
 			*hdrsize = git_hdr_len;
 			return offset;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 4/6] apply: cast some ptrdiff_t's to size_t's
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
                     ` (2 preceding siblings ...)
  2025-02-16  7:28   ` [PATCH v3 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 5/6] apply: use `size_t` loop counters Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 6/6] apply: enable -Wsign-comparison checks Zejun Zhao
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

There are several -Wsign-comparison warnings in "apply.c", complaining
about us comparing ptrdiff_t's with size_t's.

Fix these warnings by typecasting from ptrdiff_t to size_t. As to why
the casts is safe,

  - in function `date_len`, `date` is the starting address of a date at
  the end of the `line` and is guaranteed to be larger than (or equal
  to) `line`

  - in function `git_header_name`, `cp` is guaranteed to be larger than
  (or equal to) `second`, so `line + len` is greater than (or equal to)
  `cp` since we already treat `line + len - second` as a size_t

  - in function `git_header_name`, we are iterating `name` using
  `second`, so `second` is guaranteed to be greater than (or equal to)
  `name`

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/apply.c b/apply.c
index ac3e599bdf..c554a52f28 100644
--- a/apply.c
+++ b/apply.c
@@ -540,7 +540,7 @@ static size_t date_len(const char *line, size_t len)
 	    !isdigit(*p++) || !isdigit(*p++))	/* Not a date. */
 		return 0;
 
-	if (date - line >= strlen("19") &&
+	if ((size_t) (date - line) >= strlen("19") &&
 	    isdigit(date[-1]) && isdigit(date[-2]))	/* 4-digit year */
 		date -= strlen("19");
 
@@ -1207,7 +1207,7 @@ static char *git_header_name(int p_value,
 		cp = skip_tree_prefix(p_value, second, line + llen - second);
 		if (!cp)
 			goto free_and_fail1;
-		if (line + llen - cp != first.len ||
+		if ((size_t) (line + llen - cp) != first.len ||
 		    memcmp(first.buf, cp, first.len))
 			goto free_and_fail1;
 		return strbuf_detach(&first, NULL);
@@ -1240,7 +1240,7 @@ static char *git_header_name(int p_value,
 				goto free_and_fail2;
 
 			len = sp.buf + sp.len - np;
-			if (len < second - name &&
+			if (len < (size_t) (second - name) &&
 			    !strncmp(np, name, len) &&
 			    isspace(name[len])) {
 				/* Good */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 5/6] apply: use `size_t` loop counters
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
                     ` (3 preceding siblings ...)
  2025-02-16  7:28   ` [PATCH v3 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  2025-02-16  7:28   ` [PATCH v3 6/6] apply: enable -Wsign-comparison checks Zejun Zhao
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

Some `int` loop counters trigger -Wsign-comparison warnings.

Use `size_t` loop counters.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/apply.c b/apply.c
index c554a52f28..4c26f608ee 100644
--- a/apply.c
+++ b/apply.c
@@ -1371,12 +1371,11 @@ int parse_git_diff_header(struct strbuf *root,
 			{ "index ", gitdiff_index },
 			{ "", gitdiff_unrecognized },
 		};
-		int i;
 
 		len = linelen(line, size);
 		if (!len || line[len-1] != '\n')
 			break;
-		for (i = 0; i < ARRAY_SIZE(optable); i++) {
+		for (size_t i = 0; i < ARRAY_SIZE(optable); i++) {
 			const struct opentry *p = optable + i;
 			int oplen = strlen(p->str);
 			int res;
@@ -2097,7 +2096,6 @@ static void add_name_limit(struct apply_state *state,
 static int use_patch(struct apply_state *state, struct patch *p)
 {
 	const char *pathname = p->new_name ? p->new_name : p->old_name;
-	int i;
 
 	/* Paths outside are not touched regardless of "--include" */
 	if (state->prefix && *state->prefix) {
@@ -2107,7 +2105,7 @@ static int use_patch(struct apply_state *state, struct patch *p)
 	}
 
 	/* See if it matches any of exclude/include rule */
-	for (i = 0; i < state->limit_by_name.nr; i++) {
+	for (size_t i = 0; i < state->limit_by_name.nr; i++) {
 		struct string_list_item *it = &state->limit_by_name.items[i];
 		if (!wildmatch(it->string, pathname, 0))
 			return (it->util != NULL);
@@ -2183,8 +2181,7 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si
 				"Files ",
 				NULL,
 			};
-			int i;
-			for (i = 0; binhdr[i]; i++) {
+			for (size_t i = 0; binhdr[i]; i++) {
 				size_t len = strlen(binhdr[i]);
 				if (len < size - hd &&
 				    !memcmp(binhdr[i], buffer + hd, len)) {
@@ -2320,7 +2317,7 @@ static void update_pre_post_images(struct image *preimage,
 {
 	struct image fixed_preimage = IMAGE_INIT;
 	size_t insert_pos = 0;
-	int i, reduced;
+	int reduced;
 	size_t ctx;
 	const char *fixed;
 
@@ -2330,7 +2327,7 @@ static void update_pre_post_images(struct image *preimage,
 	 * free "oldlines".
 	 */
 	image_prepare(&fixed_preimage, buf, len, 1);
-	for (i = 0; i < fixed_preimage.line_nr; i++)
+	for (size_t i = 0; i < fixed_preimage.line_nr; i++)
 		fixed_preimage.line[i].flag = preimage->line[i].flag;
 	image_clear(preimage);
 	*preimage = fixed_preimage;
@@ -2339,7 +2336,7 @@ static void update_pre_post_images(struct image *preimage,
 	/*
 	 * Adjust the common context lines in postimage.
 	 */
-	for (i = reduced = ctx = 0; i < postimage->line_nr; i++) {
+	for (size_t i = reduced = ctx = 0; i < postimage->line_nr; i++) {
 		size_t l_len = postimage->line[i].len;
 
 		if (!(postimage->line[i].flag & LINE_COMMON)) {
@@ -2421,7 +2418,7 @@ static int line_by_line_fuzzy_match(struct image *img,
 				    int current_lno,
 				    size_t preimage_limit)
 {
-	int i;
+	size_t i;
 	size_t imgoff = 0;
 	size_t preoff = 0;
 	size_t extra_chars;
@@ -2488,7 +2485,7 @@ static int match_fragment(struct apply_state *state,
 			  unsigned ws_rule,
 			  int match_beginning, int match_end)
 {
-	int i;
+	size_t i;
 	const char *orig, *target;
 	struct strbuf fixed = STRBUF_INIT;
 	char *fixed_buf;
@@ -2665,12 +2662,11 @@ static int match_fragment(struct apply_state *state,
 	for ( ; i < preimage->line_nr; i++) {
 		size_t fixstart = fixed.len; /* start of the fixed preimage */
 		size_t oldlen = preimage->line[i].len;
-		int j;
 
 		/* Try fixing the line in the preimage */
 		ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL);
 
-		for (j = fixstart; j < fixed.len; j++) {
+		for (size_t j = fixstart; j < fixed.len; j++) {
 			if (!isspace(fixed.buf[j])) {
 				ret = 0;
 				goto out;
@@ -2800,7 +2796,7 @@ static void update_image(struct apply_state *state,
 	 * remove the copy of preimage at offset in img
 	 * and replace it with postimage
 	 */
-	int i, nr;
+	int nr;
 	size_t remove_count, insert_count, applied_at = 0;
 	size_t result_alloc;
 	char *result;
@@ -2819,11 +2815,11 @@ static void update_image(struct apply_state *state,
 	if (preimage_limit > img->line_nr - applied_pos)
 		preimage_limit = img->line_nr - applied_pos;
 
-	for (i = 0; i < applied_pos; i++)
+	for (size_t i = 0; i < applied_pos; i++)
 		applied_at += img->line[i].len;
 
 	remove_count = 0;
-	for (i = 0; i < preimage_limit; i++)
+	for (size_t i = 0; i < preimage_limit; i++)
 		remove_count += img->line[applied_pos + i].len;
 	insert_count = postimage->buf.len;
 
@@ -2852,7 +2848,7 @@ static void update_image(struct apply_state *state,
 			   img->line_nr - (applied_pos + preimage_limit));
 	COPY_ARRAY(img->line + applied_pos, postimage->line, postimage->line_nr);
 	if (!state->allow_overlap)
-		for (i = 0; i < postimage->line_nr; i++)
+		for (size_t i = 0; i < postimage->line_nr; i++)
 			img->line[applied_pos + i].flag |= LINE_PATCHED;
 	img->line_nr = nr;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH v3 6/6] apply: enable -Wsign-comparison checks
  2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
                     ` (4 preceding siblings ...)
  2025-02-16  7:28   ` [PATCH v3 5/6] apply: use `size_t` loop counters Zejun Zhao
@ 2025-02-16  7:28   ` Zejun Zhao
  5 siblings, 0 replies; 31+ messages in thread
From: Zejun Zhao @ 2025-02-16  7:28 UTC (permalink / raw)
  To: jelly.zhao.42; +Cc: git, gitster, newren, ps, karthik.188

Remove the `#define DISABLE_SIGN_COMPARE_WARNINGS` header line.

Signed-off-by: Zejun Zhao <jelly.zhao.42@gmail.com>
---
 apply.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/apply.c b/apply.c
index 4c26f608ee..f1113d830d 100644
--- a/apply.c
+++ b/apply.c
@@ -8,7 +8,6 @@
  */
 
 #define USE_THE_REPOSITORY_VARIABLE
-#define DISABLE_SIGN_COMPARE_WARNINGS
 
 #include "git-compat-util.h"
 #include "abspath.h"
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-16  7:12       ` [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
@ 2025-02-18 17:49         ` Junio C Hamano
  2025-02-21  6:37           ` Zejun Zhao
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2025-02-18 17:49 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: ps, git, newren

Zejun Zhao <jelly.zhao.42@gmail.com> writes:

> On Thu, Feb 13 2025 07:08:13 +0100, Patrick Steinhardt wrote,
>> > @@ -2706,7 +2707,7 @@ static int find_pos(struct apply_state *state,
>> >  {
>> >  	int i;
>> >  	unsigned long backwards, forwards, current;
>> > -	int backwards_lno, forwards_lno, current_lno;
>> > +	size_t backwards_lno, forwards_lno, current_lno;
>> >  
>> >  	/*
>> >  	 * When running with --allow-overlap, it is possible that a hunk is
>> 
>> These are a bit curious, as they store `line`, which is itself an `int`
>> parameter. As far as I understand, the only caller is also only ever
>> passing a positive integer here.
>
> They do store an `int` parameter, which is `line`. However, we cannot change 
> `line` to unsigned since it can temporarily store an negative value before 
> the assignments to these `*_lno` happen.

Correct.  Doesn't it mean that a change that makes these line-number
variables to size_t is wrong?  Of course the change is not made to
break the code but may be to please some code paths in other parts
of the system that wants these line-number variables that are
currently "int" to compare or assign without range-checking with
"size_t" quantity or variable, but then shouldn't we fix these
places, not the types of these line-number variables that use the
platform natural integers?

THanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-18 17:49         ` Junio C Hamano
@ 2025-02-21  6:37           ` Zejun Zhao
  2025-02-21 17:11             ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-21  6:37 UTC (permalink / raw)
  To: gitster; +Cc: git, jelly.zhao.42, newren, ps, karthik.188

On Tue, Feb 18 2025 09:49:46 -0800, Junio C Hamano wrote,
> Doesn't it mean that a change that makes these line-number
> variables to size_t is wrong?  Of course the change is not made to
> break the code but may be to please some code paths in other parts
> of the system that wants these line-number variables that are
> currently "int" to compare or assign without range-checking with
> "size_t" quantity or variable,

Sorry but I don't think the change here is wrong. These line-number variables 
are used as unsigned integers (compared with unsigned, used to index arrays, 
...). And what's more important, we even do range-checking on `line` before we 
assign it to those line-number's, which means we want to ensure the 
line-number's always store an unsigned integer. So why not declare these 
line-number's as unsigned if we initialize them as unsigned and at the same 
time use them as unsigned?

Regards.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-21  6:37           ` Zejun Zhao
@ 2025-02-21 17:11             ` Junio C Hamano
  2025-02-23 17:36               ` Zejun Zhao
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2025-02-21 17:11 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, newren, ps, karthik.188

Zejun Zhao <jelly.zhao.42@gmail.com> writes:

> On Tue, Feb 18 2025 09:49:46 -0800, Junio C Hamano wrote,
>> Doesn't it mean that a change that makes these line-number
>> variables to size_t is wrong?  Of course the change is not made to
>> break the code but may be to please some code paths in other parts
>> of the system that wants these line-number variables that are
>> currently "int" to compare or assign without range-checking with
>> "size_t" quantity or variable,
>
> Sorry but I don't think the change here is wrong. These line-number variables 
> are used as unsigned integers (compared with unsigned, used to index arrays, 
> ...).

I thought that we made it "int" not "unsigned" as functions that
compute these line numbers needed to signal an error "oh the input
is not a valid line number" and one of the most natural ways in C to
do so was to return a negative values from them.  Isn't that what is
happening?

For that kind of use, the storage (i.e. variables and structure
members) does not have to be signed as long as we check the error
returns from these functions (and I think we do), but then compilers
may give you needress warnings when you do assign such a value
returned from the function to the final storage after checking the
validity of the value.  You may use casts but that is trading clean
code for clean compilation without complaint from needless warnings.

Or you can make the storage also signed and we do not have to worry
about squelching the warnings with ugly casts.  When we know we are
not handling patches with a billion of lines, losing half the range
by going signed is rather a no-brainer choice between the two.

I do not see any good justification, even if we buy "the final
storage should be unsigned, since we always check before assignment"
(which I am not too opposed to), for using size_t instead of
platform natural integer (be it signed or unsigned) for the line
numbers, though.

Thanks.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-21 17:11             ` Junio C Hamano
@ 2025-02-23 17:36               ` Zejun Zhao
  2025-02-24 14:27                 ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-23 17:36 UTC (permalink / raw)
  To: gitster; +Cc: git, jelly.zhao.42, karthik.188, newren, ps

Thank you for clarifying. Now I can see the concern on whether the whole bunch 
of things worth it; it's a trade-off between cleanness and robustness of the 
codebase and it's very common due to some coding conventions (e.g. error 
handling) in C. Maybe in this case we should just leave the working code work.

Now I'd like to ask for your advice on this issue, which I may be supposed to 
do before sending any actual patches, should I push forward this patchset 
further or pick another microproject?

Thanks.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-23 17:36               ` Zejun Zhao
@ 2025-02-24 14:27                 ` Junio C Hamano
  2025-02-25  3:24                   ` Zejun Zhao
  0 siblings, 1 reply; 31+ messages in thread
From: Junio C Hamano @ 2025-02-24 14:27 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, karthik.188, newren, ps

Zejun Zhao <jelly.zhao.42@gmail.com> writes:

> Now I'd like to ask for your advice on this issue, which I may be supposed to 
> do before sending any actual patches, should I push forward this patchset 
> further or pick another microproject?

It really depends on what "this patchset" you mean.

I do think that there are still places in our code base where we
truncate size_t values that we eventually use for allocation by
mistakenly mix arithmetic with smaller type, and I do think it is
worth finding and fixing them.  So a patch that fixes a code path
with such an issue would still be a nice thing to do.  If there such
a change (I do not offhand recall) in the 6 patches from you, that
part of the series may want to be resurrected; but I do not think
changing "int" for line numbers to "size_t" is one of such changes.

If you mean "find anything that -Wsign-compare warns about and
squelch the warning by using widest type common among the quantities
involved in the expression the compiler warns about", I do not think
it is a good idea [*1*].


[Reference]

*1* https://staticthinking.wordpress.com/2023/07/25/wsign-compare-is-garbage/

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-24 14:27                 ` Junio C Hamano
@ 2025-02-25  3:24                   ` Zejun Zhao
  2025-02-25 12:53                     ` Junio C Hamano
  0 siblings, 1 reply; 31+ messages in thread
From: Zejun Zhao @ 2025-02-25  3:24 UTC (permalink / raw)
  To: gitster; +Cc: git, jelly.zhao.42, karthik.188, newren, ps

Thank you for explaining. I'll separate the desired changes from this patch to 
another since the topic will be different.

By the way, should we remove the idea from the microproject list now that we 
may not want it?

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [GSOC][PATCH] apply: address -Wsign-comparison warnings
  2025-02-25  3:24                   ` Zejun Zhao
@ 2025-02-25 12:53                     ` Junio C Hamano
  0 siblings, 0 replies; 31+ messages in thread
From: Junio C Hamano @ 2025-02-25 12:53 UTC (permalink / raw)
  To: Zejun Zhao; +Cc: git, karthik.188, newren, ps

Zejun Zhao <jelly.zhao.42@gmail.com> writes:

> Thank you for explaining. I'll separate the desired changes from this patch to 
> another since the topic will be different.
>
> By the way, should we remove the idea from the microproject list now that we 
> may not want it?

It is not like we do not want it, I think.

If you read https://git.github.io/SoC-2025-Microprojects/ (the first
entry) carefully, it does not say "find anything that -Wsign-compare
warns about and squelch the warning by using widest type common
among the quantities involved in the expression the compiler warns
about" at all.  Most specifically, it does *not* tell you to achieve
it "by using widest type".  You would squelch the warning using the
most appropriate type, which may not be the widest one.  In the case
we discussed in this thread, it was line number whose type was the
platform natural integer, but mixed up with other size_t things that
caused the compiler warning.  Blindly using size_t to squelch it may
not be what we want, but that does not mean we do not want to
squelch it by doing something else (like, by not bringing "size_t"
quantity into the picture).

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2025-02-25 12:53 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-05  1:40 [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
2025-02-05  7:47 ` Patrick Steinhardt
2025-02-05 15:42   ` Zejun Zhao
2025-02-05 12:58 ` Junio C Hamano
2025-02-05 16:49   ` Zejun Zhao
2025-02-09  8:12 ` [GSOC][PATCH v2 0/6] " Zejun Zhao
2025-02-09  8:12   ` [GSOC][PATCH v2 1/6] apply: change fields in `apply_state` to unsigned Zejun Zhao
2025-02-13  9:51     ` Karthik Nayak
2025-02-13 18:39       ` Junio C Hamano
2025-02-09  8:12   ` [GSOC][PATCH v2 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
2025-02-13  6:08     ` Patrick Steinhardt
2025-02-16  7:12       ` [GSOC][PATCH] apply: address -Wsign-comparison warnings Zejun Zhao
2025-02-18 17:49         ` Junio C Hamano
2025-02-21  6:37           ` Zejun Zhao
2025-02-21 17:11             ` Junio C Hamano
2025-02-23 17:36               ` Zejun Zhao
2025-02-24 14:27                 ` Junio C Hamano
2025-02-25  3:24                   ` Zejun Zhao
2025-02-25 12:53                     ` Junio C Hamano
2025-02-09  8:12   ` [GSOC][PATCH v2 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
2025-02-09  8:12   ` [GSOC][PATCH v2 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
2025-02-09  8:12   ` [GSOC][PATCH v2 5/6] apply: use `size_t` loop counters Zejun Zhao
2025-02-13  6:08     ` Patrick Steinhardt
2025-02-09  8:12   ` [GSOC][PATCH v2 6/6] apply: enable -Wsign-comparison checks Zejun Zhao
2025-02-16  7:28 ` [GSOC][PATCH v3 0/6] apply: address -Wsign-comparison warnings Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 1/6] apply: correct type misuse in `apply.h` Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 2/6] apply: change some variables from `int` to `size_t` Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 3/6] apply: do a typecast to eliminate warnings Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 4/6] apply: cast some ptrdiff_t's to size_t's Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 5/6] apply: use `size_t` loop counters Zejun Zhao
2025-02-16  7:28   ` [PATCH v3 6/6] apply: enable -Wsign-comparison checks Zejun Zhao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).