Git development
 help / color / mirror / Atom feed
* [PATCH 0/4] xdiff: reduce the size of a couple of arrays
@ 2026-04-02 14:57 Phillip Wood
  2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Phillip Wood @ 2026-04-02 14:57 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren

From: Phillip Wood <phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix. There are a couple of places
where we allocate arrays large enough to hold the whole file when
they only need to be big enough to hold the remaining lines after the
common prefix and suffix have been removed. This series adjusts those
allocations to avoid allocating space for the common lines.

These patches are based on 'en/xdiff-cleanup-3'

Base-Commit: 7ff1460b62ffc8f18a5478be5aba9d4599afb635
Published-As: https://github.com/phillipwood/git/releases/tag/pw%2Fxdiff-reduce-array-sizes%2Fv1
View-Changes-At: https://github.com/phillipwood/git/compare/7ff1460b6...a3438dc09
Fetch-It-Via: git fetch https://github.com/phillipwood/git pw/xdiff-reduce-array-sizes/v1


Phillip Wood (4):
  xdiff: reduce size of action arrays
  xdiff: cleanup xdl_clean_mmatch()
  xprepare: simplify error handling
  xdiff: reduce the size of array

 xdiff/xprepare.c | 46 ++++++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

-- 
2.52.0.362.g884e03848a9.dirty


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] xdiff: reduce size of action arrays
  2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
@ 2026-04-02 14:57 ` Phillip Wood
  2026-04-02 19:19   ` Junio C Hamano
  2026-04-02 14:57 ` [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Phillip Wood @ 2026-04-02 14:57 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix. Then any lines that appear
only in one side of the diff are marked as changed and frequently
occurring lines are marked as changed if they are adjacent to a
changed line. This step requires a couple of temporary arrays. As as
the common prefix and suffix have already been removed, the arrays
only need to be big enough to hold the lines between them, not the
whole file. Reduce the size of the arrays and adjust the loops that
use them accordingly while taking care to keep indexing the arrays
in xdfile_t with absolute line numbers.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 1f2e8c6b4b9..4bb3a8ef41c 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -273,16 +273,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	uint8_t *action1 = NULL, *action2 = NULL;
 	bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
 	int ret = 0;
+	ptrdiff_t off = xdf1->dstart;
+	ptrdiff_t len1 = xdf1->dend - off + 1;
+	ptrdiff_t len2 = xdf2->dend - off + 1;
 
 	/*
 	 * Create temporary arrays that will help us decide if
 	 * changed[i] should remain false, or become true.
 	 */
-	if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
+	if (!XDL_CALLOC_ARRAY(action1, len1)) {
 		ret = -1;
 		goto cleanup;
 	}
-	if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
+	if (!XDL_CALLOC_ARRAY(action2, len2)) {
 		ret = -1;
 		goto cleanup;
 	}
@@ -299,8 +302,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	/*
 	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
 	 */
-	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
-		size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
+	for (i = 0; i < len1; i++) {
+		size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash;
 		rcrec = cf->rcrecs[mph1];
 		nm = rcrec ? rcrec->len2 : 0;
 		if (nm == 0)
@@ -311,8 +314,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 			action1[i] = INVESTIGATE;
 	}
 
-	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
-		size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
+	for (i = 0; i < len2; i++) {
+		size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash;
 		rcrec = cf->rcrecs[mph2];
 		nm = rcrec ? rcrec->len1 : 0;
 		if (nm == 0)
@@ -328,37 +331,37 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * false, or become true.
 	 */
 	xdf1->nreff = 0;
-	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
+	for (i = 0; i < len1; i++) {
 		if (action1[i] == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
+			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
 				action1[i] = KEEP;
 			else
 				action1[i] = DISCARD;
 		}
 
 		if (action1[i] == KEEP) {
-			xdf1->reference_index[xdf1->nreff++] = i;
+			xdf1->reference_index[xdf1->nreff++] = i + off;
 			/* changed[i] remains false */
 		} else if (action1[i] == DISCARD)
-			xdf1->changed[i] = true;
+			xdf1->changed[i + off] = true;
 		else
 			BUG("Illegal state for action1[i]");
 	}
 
 	xdf2->nreff = 0;
-	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
+	for (i = 0; i < len2; i++) {
 		if (action2[i] == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
+			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
 				action2[i] = KEEP;
 			else
 				action2[i] = DISCARD;
 		}
 
 		if (action2[i] == KEEP) {
-			xdf2->reference_index[xdf2->nreff++] = i;
+			xdf2->reference_index[xdf2->nreff++] = i + off;
 			/* changed[i] remains false */
 		} else if (action2[i] == DISCARD)
-			xdf2->changed[i] = true;
+			xdf2->changed[i + off] = true;
 		else
 			BUG("Illegal state for action2[i]");
 	}
-- 
2.52.0.362.g884e03848a9.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch()
  2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
@ 2026-04-02 14:57 ` Phillip Wood
  2026-04-02 19:20   ` Junio C Hamano
  2026-04-02 14:57 ` [PATCH 3/4] xprepare: simplify error handling Phillip Wood
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Phillip Wood @ 2026-04-02 14:57 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

Remove the "s" parameter as, since the last commit, this function
is always called with s == 0. Also change parameter "e" to expect a
length, rather than the index of the last line to simplify the caller.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 4bb3a8ef41c..f8e6a6d74d5 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -197,8 +197,9 @@ void xdl_free_env(xdfenv_t *xe) {
 }
 
 
-static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) {
+static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) {
 	ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1;
+	ptrdiff_t s = 0, e = len - 1;
 
 	/*
 	 * Limits the window that is examined during the similar-lines
@@ -333,7 +334,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	xdf1->nreff = 0;
 	for (i = 0; i < len1; i++) {
 		if (action1[i] == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
+			if (!xdl_clean_mmatch(action1, i, len1))
 				action1[i] = KEEP;
 			else
 				action1[i] = DISCARD;
@@ -351,7 +352,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	xdf2->nreff = 0;
 	for (i = 0; i < len2; i++) {
 		if (action2[i] == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
+			if (!xdl_clean_mmatch(action2, i, len2))
 				action2[i] = KEEP;
 			else
 				action2[i] = DISCARD;
-- 
2.52.0.362.g884e03848a9.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] xprepare: simplify error handling
  2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
  2026-04-02 14:57 ` [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
@ 2026-04-02 14:57 ` Phillip Wood
  2026-04-02 19:24   ` Junio C Hamano
  2026-04-02 14:57 ` [PATCH 4/4] xdiff: reduce the size of array Phillip Wood
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  4 siblings, 1 reply; 14+ messages in thread
From: Phillip Wood @ 2026-04-02 14:57 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

If either of the two allocations fail we want to take the same action
so use a single if statement. This saves a few lines and makes it
easier for the next commit to add a couple more allocations.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index f8e6a6d74d5..cf4ac34f047 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -282,11 +282,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * Create temporary arrays that will help us decide if
 	 * changed[i] should remain false, or become true.
 	 */
-	if (!XDL_CALLOC_ARRAY(action1, len1)) {
-		ret = -1;
-		goto cleanup;
-	}
-	if (!XDL_CALLOC_ARRAY(action2, len2)) {
+	if (!XDL_CALLOC_ARRAY(action1, len1) ||
+	    !XDL_CALLOC_ARRAY(action2, len2)) {
 		ret = -1;
 		goto cleanup;
 	}
-- 
2.52.0.362.g884e03848a9.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] xdiff: reduce the size of array
  2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
                   ` (2 preceding siblings ...)
  2026-04-02 14:57 ` [PATCH 3/4] xprepare: simplify error handling Phillip Wood
@ 2026-04-02 14:57 ` Phillip Wood
  2026-04-02 19:44   ` Junio C Hamano
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  4 siblings, 1 reply; 14+ messages in thread
From: Phillip Wood @ 2026-04-02 14:57 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix and any lines that appear
in only one file. This requires a map to be created between the
lines that are processed by the myers algorithm and the lines in
the original file. That map does not include the common lines at the
beginning and end of the files but the array is allocated to be the
size of the whole file. Move the allocation into xdl_cleanup_records()
where the map is populated and we know how big it needs to be.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index cf4ac34f047..c5a3c9cde76 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -171,12 +171,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2))
 		goto abort;
 
-	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
-		if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1))
-			goto abort;
-	}
-
 	xdf->changed += 1;
 	xdf->nreff = 0;
 	xdf->dstart = 0;
@@ -283,7 +277,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * changed[i] should remain false, or become true.
 	 */
 	if (!XDL_CALLOC_ARRAY(action1, len1) ||
-	    !XDL_CALLOC_ARRAY(action2, len2)) {
+	    !XDL_CALLOC_ARRAY(action2, len2) ||
+	    !XDL_ALLOC_ARRAY(xdf1->reference_index, len1) ||
+	    !XDL_ALLOC_ARRAY(xdf2->reference_index, len2))
+	{
 		ret = -1;
 		goto cleanup;
 	}
-- 
2.52.0.362.g884e03848a9.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] xdiff: reduce size of action arrays
  2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
@ 2026-04-02 19:19   ` Junio C Hamano
  0 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2026-04-02 19:19 UTC (permalink / raw)
  To: Phillip Wood; +Cc: git, Ezekiel Newren

Phillip Wood <phillip.wood123@gmail.com> writes:

> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> When the myers algorithm is selected the input files are pre-processed
> to remove any common prefix and suffix. Then any lines that appear
> only in one side of the diff are marked as changed and frequently
> occurring lines are marked as changed if they are adjacent to a
> changed line. This step requires a couple of temporary arrays. As as
> the common prefix and suffix have already been removed, the arrays
> only need to be big enough to hold the lines between them, not the
> whole file. Reduce the size of the arrays and adjust the loops that
> use them accordingly while taking care to keep indexing the arrays
> in xdfile_t with absolute line numbers.

"As as"???

> Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> ---
>  xdiff/xprepare.c | 31 +++++++++++++++++--------------
>  1 file changed, 17 insertions(+), 14 deletions(-)
>
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index 1f2e8c6b4b9..4bb3a8ef41c 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -273,16 +273,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	uint8_t *action1 = NULL, *action2 = NULL;
>  	bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
>  	int ret = 0;
> +	ptrdiff_t off = xdf1->dstart;
> +	ptrdiff_t len1 = xdf1->dend - off + 1;
> +	ptrdiff_t len2 = xdf2->dend - off + 1;
>  
>  	/*
>  	 * Create temporary arrays that will help us decide if
>  	 * changed[i] should remain false, or become true.
>  	 */
> -	if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
> +	if (!XDL_CALLOC_ARRAY(action1, len1)) {
>  		ret = -1;
>  		goto cleanup;
>  	}
> -	if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
> +	if (!XDL_CALLOC_ARRAY(action2, len2)) {
>  		ret = -1;
>  		goto cleanup;
>  	}

OK, so we used to allocate for the whole thing, but now we only
allocate for lines starting at dstart.  "off" is the difference
between [i], the index into these action arrays, and the true line
numbers.

> @@ -299,8 +302,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	/*
>  	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
>  	 */
> -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
> -		size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
> +	for (i = 0; i < len1; i++) {
> +		size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash;

And we iterate as many times as we have entries in the action array,
but we need to offset the [i] with off when looking at the record.

>  		rcrec = cf->rcrecs[mph1];
>  		nm = rcrec ? rcrec->len2 : 0;
>  		if (nm == 0)
> @@ -311,8 +314,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  			action1[i] = INVESTIGATE;
>  	}
>  
> -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
> -		size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
> +	for (i = 0; i < len2; i++) {
> +		size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash;

Likewise.

> @@ -328,37 +331,37 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	 * false, or become true.
>  	 */
>  	xdf1->nreff = 0;
> -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
> +	for (i = 0; i < len1; i++) {
>  		if (action1[i] == INVESTIGATE) {
> -			if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
> +			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))

Let me think aloud to see if I can follow the logic here.  Looking
at the implementation of the xdl_clean_mmatch() function, it takes
an action array, an offset 'i' into it (starting from dstart), the
beginning 'dstart' and the end 'dend' offsets.  The idea is that an
index derived from 'i' is used to index the action array and the
beginning and the end offsets are used to limit how much far the
access can deviate from 'i'.

Now we stripped the first xdf1->dstart elements from action1[]
array, 'i' in this loop runs from 0 (i.e. one beyond the initial
common section) and len1 (i.e. one before the tail end of the common
section), i.e., everything is consistently shifted down by xdf1->dstart
in this call.  So xdl_clean_mmatch() does not even need to know that
it is fed a shortened action[] array.


>  				action1[i] = KEEP;
>  			else
>  				action1[i] = DISCARD;
>  		}
>  
>  		if (action1[i] == KEEP) {
> -			xdf1->reference_index[xdf1->nreff++] = i;
> +			xdf1->reference_index[xdf1->nreff++] = i + off;
>  			/* changed[i] remains false */
>  		} else if (action1[i] == DISCARD)
> -			xdf1->changed[i] = true;
> +			xdf1->changed[i + off] = true;

But these two arrays are not shrunk, so we need to compensate by the 'off'
offset.

And the remainder is similar but for xdf2 instead of xdf1 above.

>  		else
>  			BUG("Illegal state for action1[i]");
>  	}
>  
>  	xdf2->nreff = 0;
> -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
> +	for (i = 0; i < len2; i++) {
>  		if (action2[i] == INVESTIGATE) {
> -			if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
> +			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
>  				action2[i] = KEEP;
>  			else
>  				action2[i] = DISCARD;
>  		}
>  
>  		if (action2[i] == KEEP) {
> -			xdf2->reference_index[xdf2->nreff++] = i;
> +			xdf2->reference_index[xdf2->nreff++] = i + off;
>  			/* changed[i] remains false */
>  		} else if (action2[i] == DISCARD)
> -			xdf2->changed[i] = true;
> +			xdf2->changed[i + off] = true;
>  		else
>  			BUG("Illegal state for action2[i]");
>  	}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch()
  2026-04-02 14:57 ` [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
@ 2026-04-02 19:20   ` Junio C Hamano
  0 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2026-04-02 19:20 UTC (permalink / raw)
  To: Phillip Wood; +Cc: git, Ezekiel Newren

Phillip Wood <phillip.wood123@gmail.com> writes:

> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> Remove the "s" parameter as, since the last commit, this function
> is always called with s == 0. Also change parameter "e" to expect a
> length, rather than the index of the last line to simplify the caller.
>
> Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> ---
>  xdiff/xprepare.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)

Very logical consequence, given what the previous step did.  Makes sense.

>
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index 4bb3a8ef41c..f8e6a6d74d5 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -197,8 +197,9 @@ void xdl_free_env(xdfenv_t *xe) {
>  }
>  
>  
> -static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) {
> +static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) {
>  	ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1;
> +	ptrdiff_t s = 0, e = len - 1;
>  
>  	/*
>  	 * Limits the window that is examined during the similar-lines
> @@ -333,7 +334,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	xdf1->nreff = 0;
>  	for (i = 0; i < len1; i++) {
>  		if (action1[i] == INVESTIGATE) {
> -			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
> +			if (!xdl_clean_mmatch(action1, i, len1))
>  				action1[i] = KEEP;
>  			else
>  				action1[i] = DISCARD;
> @@ -351,7 +352,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	xdf2->nreff = 0;
>  	for (i = 0; i < len2; i++) {
>  		if (action2[i] == INVESTIGATE) {
> -			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
> +			if (!xdl_clean_mmatch(action2, i, len2))
>  				action2[i] = KEEP;
>  			else
>  				action2[i] = DISCARD;

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] xprepare: simplify error handling
  2026-04-02 14:57 ` [PATCH 3/4] xprepare: simplify error handling Phillip Wood
@ 2026-04-02 19:24   ` Junio C Hamano
  0 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2026-04-02 19:24 UTC (permalink / raw)
  To: Phillip Wood; +Cc: git, Ezekiel Newren

Phillip Wood <phillip.wood123@gmail.com> writes:

> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> If either of the two allocations fail we want to take the same action
> so use a single if statement. This saves a few lines and makes it
> easier for the next commit to add a couple more allocations.
>
> Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> ---
>  xdiff/xprepare.c | 7 ++-----
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index f8e6a6d74d5..cf4ac34f047 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -282,11 +282,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	 * Create temporary arrays that will help us decide if
>  	 * changed[i] should remain false, or become true.
>  	 */
> -	if (!XDL_CALLOC_ARRAY(action1, len1)) {
> -		ret = -1;
> -		goto cleanup;
> -	}
> -	if (!XDL_CALLOC_ARRAY(action2, len2)) {
> +	if (!XDL_CALLOC_ARRAY(action1, len1) ||
> +	    !XDL_CALLOC_ARRAY(action2, len2)) {
>  		ret = -1;
>  		goto cleanup;
>  	}

If the original were "after successfully allocating action1[], if
allocation of action2[] fails, then release action1[] before
returning -1", written in place, it would have been a different
story, but the "cleanup:" label is left to free each and every
resources the code obtains in this function, so this consolidation
does make sense.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] xdiff: reduce the size of array
  2026-04-02 14:57 ` [PATCH 4/4] xdiff: reduce the size of array Phillip Wood
@ 2026-04-02 19:44   ` Junio C Hamano
  0 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2026-04-02 19:44 UTC (permalink / raw)
  To: Phillip Wood; +Cc: git, Ezekiel Newren

Phillip Wood <phillip.wood123@gmail.com> writes:

> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> When the myers algorithm is selected the input files are pre-processed
> to remove any common prefix and suffix and any lines that appear
> in only one file. This requires a map to be created between the
> lines that are processed by the myers algorithm and the lines in
> the original file. That map does not include the common lines at the
> beginning and end of the files but the array is allocated to be the
> size of the whole file. Move the allocation into xdl_cleanup_records()
> where the map is populated and we know how big it needs to be.
>
> Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
> ---
>  xdiff/xprepare.c | 11 ++++-------
>  1 file changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
> index cf4ac34f047..c5a3c9cde76 100644
> --- a/xdiff/xprepare.c
> +++ b/xdiff/xprepare.c
> @@ -171,12 +171,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
>  	if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2))
>  		goto abort;
>  
> -	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
> -	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
> -		if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1))
> -			goto abort;
> -	}
> -
>  	xdf->changed += 1;
>  	xdf->nreff = 0;
>  	xdf->dstart = 0;
> @@ -283,7 +277,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
>  	 * changed[i] should remain false, or become true.
>  	 */
>  	if (!XDL_CALLOC_ARRAY(action1, len1) ||
> -	    !XDL_CALLOC_ARRAY(action2, len2)) {
> +	    !XDL_CALLOC_ARRAY(action2, len2) ||
> +	    !XDL_ALLOC_ARRAY(xdf1->reference_index, len1) ||
> +	    !XDL_ALLOC_ARRAY(xdf2->reference_index, len2))
> +	{
>  		ret = -1;
>  		goto cleanup;
>  	}

OK.  In xdl_cleanup_records(), accesses toxdf{1,2}->reference_index[] 
already runs from index 0 (i.e., array element at [0] corresponds to
the xdf1->dstart) even without the previous three patches.  So we
were only wasting the elements near the end in these two arrays.
And the loop that uses the array runs only for len1 times, and the
array may acquire at most one new element per iteration, so len1 is
the reasonable allocation size for xdf1->reference_index[].

Looking good.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays
  2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
                   ` (3 preceding siblings ...)
  2026-04-02 14:57 ` [PATCH 4/4] xdiff: reduce the size of array Phillip Wood
@ 2026-05-04 14:06 ` Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 1/4] xdiff: reduce size of action arrays Phillip Wood
                     ` (3 more replies)
  4 siblings, 4 replies; 14+ messages in thread
From: Phillip Wood @ 2026-05-04 14:06 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Junio C Hamano, Phillip Wood

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix. There are a couple of places
where we allocate arrays large enough to hold the whole file when
they only need to be big enough to hold the remaining lines after the
common prefix and suffix have been removed. This series adjusts those
allocations to avoid allocating space for the common lines.

These patches are based on 'en/xdiff-cleanup-3'

Changes since V1:
 - rebased onto updated upstream

Base-Commit: f87808b7014cf06db4a7e19b193cf9aa7e965ebc
Published-As: https://github.com/phillipwood/git/releases/tag/pw%2Fxdiff-reduce-array-sizes%2Fv2
View-Changes-At: https://github.com/phillipwood/git/compare/f87808b70...d7cb49a7c
Fetch-It-Via: git fetch https://github.com/phillipwood/git pw/xdiff-reduce-array-sizes/v2


Phillip Wood (4):
  xdiff: reduce size of action arrays
  xdiff: cleanup xdl_clean_mmatch()
  xprepare: simplify error handling
  xdiff: reduce the size of array

 xdiff/xprepare.c | 46 ++++++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

Range-diff against v1:
1:  447b8c0af17 ! 1:  ec692cabfec xdiff: reduce size of action arrays
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      		goto cleanup;
      	}
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	/*
    - 	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
    - 	 */
    + 		if (mlim1 > XDL_MAX_EQLIMIT)
    + 			mlim1 = XDL_MAX_EQLIMIT;
    + 	}
     -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
     -		size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
     +	for (i = 0; i < len1; i++) {
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      		nm = rcrec ? rcrec->len2 : 0;
      		if (nm == 0)
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 			action1[i] = INVESTIGATE;
    + 		if (mlim2 > XDL_MAX_EQLIMIT)
    + 			mlim2 = XDL_MAX_EQLIMIT;
      	}
    - 
     -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
     -		size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
     +	for (i = 0; i < len2; i++) {
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      	xdf1->nreff = 0;
     -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
     +	for (i = 0; i < len1; i++) {
    - 		if (action1[i] == INVESTIGATE) {
    + 		uint8_t action = action1[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
     +			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
    - 				action1[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action1[i] = DISCARD;
    + 				action = DISCARD;
      		}
      
    - 		if (action1[i] == KEEP) {
    + 		if (action == KEEP) {
     -			xdf1->reference_index[xdf1->nreff++] = i;
     +			xdf1->reference_index[xdf1->nreff++] = i + off;
      			/* changed[i] remains false */
    - 		} else if (action1[i] == DISCARD)
    + 		} else if (action == DISCARD) {
     -			xdf1->changed[i] = true;
     +			xdf1->changed[i + off] = true;
    - 		else
    - 			BUG("Illegal state for action1[i]");
    + 		} else {
    + 			BUG("Illegal state for action");
    + 		}
      	}
      
      	xdf2->nreff = 0;
     -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
     +	for (i = 0; i < len2; i++) {
    - 		if (action2[i] == INVESTIGATE) {
    + 		uint8_t action = action2[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
     +			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
    - 				action2[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action2[i] = DISCARD;
    + 				action = DISCARD;
      		}
      
    - 		if (action2[i] == KEEP) {
    + 		if (action == KEEP) {
     -			xdf2->reference_index[xdf2->nreff++] = i;
     +			xdf2->reference_index[xdf2->nreff++] = i + off;
      			/* changed[i] remains false */
    - 		} else if (action2[i] == DISCARD)
    + 		} else if (action == DISCARD) {
     -			xdf2->changed[i] = true;
     +			xdf2->changed[i + off] = true;
    - 		else
    - 			BUG("Illegal state for action2[i]");
    - 	}
    + 		} else {
    + 			BUG("Illegal state for action");
    + 		}
2:  78e9313fd44 ! 2:  977f4577521 xdiff: cleanup xdl_clean_mmatch()
    @@ xdiff/xprepare.c: void xdl_free_env(xdfenv_t *xe) {
      	/*
      	 * Limits the window that is examined during the similar-lines
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	xdf1->nreff = 0;
    - 	for (i = 0; i < len1; i++) {
    - 		if (action1[i] == INVESTIGATE) {
    + 		uint8_t action = action1[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
     +			if (!xdl_clean_mmatch(action1, i, len1))
    - 				action1[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action1[i] = DISCARD;
    + 				action = DISCARD;
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	xdf2->nreff = 0;
    - 	for (i = 0; i < len2; i++) {
    - 		if (action2[i] == INVESTIGATE) {
    + 		uint8_t action = action2[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
     +			if (!xdl_clean_mmatch(action2, i, len2))
    - 				action2[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action2[i] = DISCARD;
    + 				action = DISCARD;
3:  cdcad99edc4 = 3:  24e65d42b72 xprepare: simplify error handling
4:  a3438dc0933 = 4:  d7cb49a7c99 xdiff: reduce the size of array
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2 1/4] xdiff: reduce size of action arrays
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
@ 2026-05-04 14:06   ` Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Phillip Wood @ 2026-05-04 14:06 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Junio C Hamano, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix. Then any lines that appear
only in one side of the diff are marked as changed and frequently
occurring lines are marked as changed if they are adjacent to a
changed line. This step requires a couple of temporary arrays. As as
the common prefix and suffix have already been removed, the arrays
only need to be big enough to hold the lines between them, not the
whole file. Reduce the size of the arrays and adjust the loops that
use them accordingly while taking care to keep indexing the arrays
in xdfile_t with absolute line numbers.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index beef711067b..3b6bae0d158 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -273,16 +273,19 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	uint8_t *action1 = NULL, *action2 = NULL;
 	bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
 	int ret = 0;
+	ptrdiff_t off = xdf1->dstart;
+	ptrdiff_t len1 = xdf1->dend - off + 1;
+	ptrdiff_t len2 = xdf2->dend - off + 1;
 
 	/*
 	 * Create temporary arrays that will help us decide if
 	 * changed[i] should remain false, or become true.
 	 */
-	if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
+	if (!XDL_CALLOC_ARRAY(action1, len1)) {
 		ret = -1;
 		goto cleanup;
 	}
-	if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
+	if (!XDL_CALLOC_ARRAY(action2, len2)) {
 		ret = -1;
 		goto cleanup;
 	}
@@ -298,8 +301,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 		if (mlim1 > XDL_MAX_EQLIMIT)
 			mlim1 = XDL_MAX_EQLIMIT;
 	}
-	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
-		size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
+	for (i = 0; i < len1; i++) {
+		size_t mph1 = xdf1->recs[i + off].minimal_perfect_hash;
 		rcrec = cf->rcrecs[mph1];
 		nm = rcrec ? rcrec->len2 : 0;
 		if (nm == 0)
@@ -318,8 +321,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 		if (mlim2 > XDL_MAX_EQLIMIT)
 			mlim2 = XDL_MAX_EQLIMIT;
 	}
-	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
-		size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
+	for (i = 0; i < len2; i++) {
+		size_t mph2 = xdf2->recs[i + off].minimal_perfect_hash;
 		rcrec = cf->rcrecs[mph2];
 		nm = rcrec ? rcrec->len1 : 0;
 		if (nm == 0)
@@ -335,42 +338,42 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * false, or become true.
 	 */
 	xdf1->nreff = 0;
-	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
+	for (i = 0; i < len1; i++) {
 		uint8_t action = action1[i];
 
 		if (action == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
+			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
 				action = KEEP;
 			else
 				action = DISCARD;
 		}
 
 		if (action == KEEP) {
-			xdf1->reference_index[xdf1->nreff++] = i;
+			xdf1->reference_index[xdf1->nreff++] = i + off;
 			/* changed[i] remains false */
 		} else if (action == DISCARD) {
-			xdf1->changed[i] = true;
+			xdf1->changed[i + off] = true;
 		} else {
 			BUG("Illegal state for action");
 		}
 	}
 
 	xdf2->nreff = 0;
-	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
+	for (i = 0; i < len2; i++) {
 		uint8_t action = action2[i];
 
 		if (action == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
+			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
 				action = KEEP;
 			else
 				action = DISCARD;
 		}
 
 		if (action == KEEP) {
-			xdf2->reference_index[xdf2->nreff++] = i;
+			xdf2->reference_index[xdf2->nreff++] = i + off;
 			/* changed[i] remains false */
 		} else if (action == DISCARD) {
-			xdf2->changed[i] = true;
+			xdf2->changed[i + off] = true;
 		} else {
 			BUG("Illegal state for action");
 		}
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 2/4] xdiff: cleanup xdl_clean_mmatch()
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 1/4] xdiff: reduce size of action arrays Phillip Wood
@ 2026-05-04 14:06   ` Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 3/4] xprepare: simplify error handling Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 4/4] xdiff: reduce the size of array Phillip Wood
  3 siblings, 0 replies; 14+ messages in thread
From: Phillip Wood @ 2026-05-04 14:06 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Junio C Hamano, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

Remove the "s" parameter as, since the last commit, this function
is always called with s == 0. Also change parameter "e" to expect a
length, rather than the index of the last line to simplify the caller.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 3b6bae0d158..81de412875a 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -197,8 +197,9 @@ void xdl_free_env(xdfenv_t *xe) {
 }
 
 
-static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) {
+static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t len) {
 	ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1;
+	ptrdiff_t s = 0, e = len - 1;
 
 	/*
 	 * Limits the window that is examined during the similar-lines
@@ -342,7 +343,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 		uint8_t action = action1[i];
 
 		if (action == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
+			if (!xdl_clean_mmatch(action1, i, len1))
 				action = KEEP;
 			else
 				action = DISCARD;
@@ -363,7 +364,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 		uint8_t action = action2[i];
 
 		if (action == INVESTIGATE) {
-			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
+			if (!xdl_clean_mmatch(action2, i, len2))
 				action = KEEP;
 			else
 				action = DISCARD;
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 3/4] xprepare: simplify error handling
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 1/4] xdiff: reduce size of action arrays Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
@ 2026-05-04 14:06   ` Phillip Wood
  2026-05-04 14:06   ` [PATCH v2 4/4] xdiff: reduce the size of array Phillip Wood
  3 siblings, 0 replies; 14+ messages in thread
From: Phillip Wood @ 2026-05-04 14:06 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Junio C Hamano, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

If either of the two allocations fail we want to take the same action
so use a single if statement. This saves a few lines and makes it
easier for the next commit to add a couple more allocations.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 81de412875a..7a29e5fc474 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -282,11 +282,8 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * Create temporary arrays that will help us decide if
 	 * changed[i] should remain false, or become true.
 	 */
-	if (!XDL_CALLOC_ARRAY(action1, len1)) {
-		ret = -1;
-		goto cleanup;
-	}
-	if (!XDL_CALLOC_ARRAY(action2, len2)) {
+	if (!XDL_CALLOC_ARRAY(action1, len1) ||
+	    !XDL_CALLOC_ARRAY(action2, len2)) {
 		ret = -1;
 		goto cleanup;
 	}
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v2 4/4] xdiff: reduce the size of array
  2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
                     ` (2 preceding siblings ...)
  2026-05-04 14:06   ` [PATCH v2 3/4] xprepare: simplify error handling Phillip Wood
@ 2026-05-04 14:06   ` Phillip Wood
  3 siblings, 0 replies; 14+ messages in thread
From: Phillip Wood @ 2026-05-04 14:06 UTC (permalink / raw)
  To: git; +Cc: Ezekiel Newren, Junio C Hamano, Phillip Wood

From: Phillip Wood <phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix and any lines that appear
in only one file. This requires a map to be created between the
lines that are processed by the myers algorithm and the lines in
the original file. That map does not include the common lines at the
beginning and end of the files but the array is allocated to be the
size of the whole file. Move the allocation into xdl_cleanup_records()
where the map is populated and we know how big it needs to be.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 xdiff/xprepare.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 7a29e5fc474..11bada2608a 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -170,12 +170,6 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 
 	if (!XDL_CALLOC_ARRAY(xdf->changed, xdf->nrec + 2))
 		goto abort;
-
-	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
-		if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1))
-			goto abort;
-	}
 
 	xdf->changed += 1;
 	xdf->nreff = 0;
@@ -283,7 +277,10 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * changed[i] should remain false, or become true.
 	 */
 	if (!XDL_CALLOC_ARRAY(action1, len1) ||
-	    !XDL_CALLOC_ARRAY(action2, len2)) {
+	    !XDL_CALLOC_ARRAY(action2, len2) ||
+	    !XDL_ALLOC_ARRAY(xdf1->reference_index, len1) ||
+	    !XDL_ALLOC_ARRAY(xdf2->reference_index, len2))
+	{
 		ret = -1;
 		goto cleanup;
 	}
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2026-05-04 14:06 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
2026-04-02 19:19   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
2026-04-02 19:20   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 3/4] xprepare: simplify error handling Phillip Wood
2026-04-02 19:24   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 4/4] xdiff: reduce the size of array Phillip Wood
2026-04-02 19:44   ` Junio C Hamano
2026-05-04 14:06 ` [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
2026-05-04 14:06   ` [PATCH v2 1/4] xdiff: reduce size of action arrays Phillip Wood
2026-05-04 14:06   ` [PATCH v2 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
2026-05-04 14:06   ` [PATCH v2 3/4] xprepare: simplify error handling Phillip Wood
2026-05-04 14:06   ` [PATCH v2 4/4] xdiff: reduce the size of array Phillip Wood

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox