Git development
 help / color / mirror / Atom feed
From: Phillip Wood <phillip.wood123@gmail.com>
To: git@vger.kernel.org
Cc: Ezekiel Newren <ezekielnewren@gmail.com>,
	Junio C Hamano <gitster@pobox.com>,
	Phillip Wood <phillip.wood123@gmail.com>
Subject: [PATCH v2 0/4] xdiff: reduce the size of a couple of arrays
Date: Mon,  4 May 2026 15:06:17 +0100	[thread overview]
Message-ID: <cover.1777903579.git.phillip.wood@dunelm.org.uk> (raw)
In-Reply-To: <cover.1775141855.git.phillip.wood@dunelm.org.uk>

When the myers algorithm is selected the input files are pre-processed
to remove any common prefix and suffix. There are a couple of places
where we allocate arrays large enough to hold the whole file when
they only need to be big enough to hold the remaining lines after the
common prefix and suffix have been removed. This series adjusts those
allocations to avoid allocating space for the common lines.

These patches are based on 'en/xdiff-cleanup-3'

Changes since V1:
 - rebased onto updated upstream

Base-Commit: f87808b7014cf06db4a7e19b193cf9aa7e965ebc
Published-As: https://github.com/phillipwood/git/releases/tag/pw%2Fxdiff-reduce-array-sizes%2Fv2
View-Changes-At: https://github.com/phillipwood/git/compare/f87808b70...d7cb49a7c
Fetch-It-Via: git fetch https://github.com/phillipwood/git pw/xdiff-reduce-array-sizes/v2


Phillip Wood (4):
  xdiff: reduce size of action arrays
  xdiff: cleanup xdl_clean_mmatch()
  xprepare: simplify error handling
  xdiff: reduce the size of array

 xdiff/xprepare.c | 46 ++++++++++++++++++++++------------------------
 1 file changed, 22 insertions(+), 24 deletions(-)

Range-diff against v1:
1:  447b8c0af17 ! 1:  ec692cabfec xdiff: reduce size of action arrays
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      		goto cleanup;
      	}
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	/*
    - 	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
    - 	 */
    + 		if (mlim1 > XDL_MAX_EQLIMIT)
    + 			mlim1 = XDL_MAX_EQLIMIT;
    + 	}
     -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
     -		size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
     +	for (i = 0; i < len1; i++) {
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      		nm = rcrec ? rcrec->len2 : 0;
      		if (nm == 0)
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 			action1[i] = INVESTIGATE;
    + 		if (mlim2 > XDL_MAX_EQLIMIT)
    + 			mlim2 = XDL_MAX_EQLIMIT;
      	}
    - 
     -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
     -		size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
     +	for (i = 0; i < len2; i++) {
    @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *
      	xdf1->nreff = 0;
     -	for (i = xdf1->dstart; i <= xdf1->dend; i++) {
     +	for (i = 0; i < len1; i++) {
    - 		if (action1[i] == INVESTIGATE) {
    + 		uint8_t action = action1[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
     +			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
    - 				action1[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action1[i] = DISCARD;
    + 				action = DISCARD;
      		}
      
    - 		if (action1[i] == KEEP) {
    + 		if (action == KEEP) {
     -			xdf1->reference_index[xdf1->nreff++] = i;
     +			xdf1->reference_index[xdf1->nreff++] = i + off;
      			/* changed[i] remains false */
    - 		} else if (action1[i] == DISCARD)
    + 		} else if (action == DISCARD) {
     -			xdf1->changed[i] = true;
     +			xdf1->changed[i + off] = true;
    - 		else
    - 			BUG("Illegal state for action1[i]");
    + 		} else {
    + 			BUG("Illegal state for action");
    + 		}
      	}
      
      	xdf2->nreff = 0;
     -	for (i = xdf2->dstart; i <= xdf2->dend; i++) {
     +	for (i = 0; i < len2; i++) {
    - 		if (action2[i] == INVESTIGATE) {
    + 		uint8_t action = action2[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
     +			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
    - 				action2[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action2[i] = DISCARD;
    + 				action = DISCARD;
      		}
      
    - 		if (action2[i] == KEEP) {
    + 		if (action == KEEP) {
     -			xdf2->reference_index[xdf2->nreff++] = i;
     +			xdf2->reference_index[xdf2->nreff++] = i + off;
      			/* changed[i] remains false */
    - 		} else if (action2[i] == DISCARD)
    + 		} else if (action == DISCARD) {
     -			xdf2->changed[i] = true;
     +			xdf2->changed[i + off] = true;
    - 		else
    - 			BUG("Illegal state for action2[i]");
    - 	}
    + 		} else {
    + 			BUG("Illegal state for action");
    + 		}
2:  78e9313fd44 ! 2:  977f4577521 xdiff: cleanup xdl_clean_mmatch()
    @@ xdiff/xprepare.c: void xdl_free_env(xdfenv_t *xe) {
      	/*
      	 * Limits the window that is examined during the similar-lines
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	xdf1->nreff = 0;
    - 	for (i = 0; i < len1; i++) {
    - 		if (action1[i] == INVESTIGATE) {
    + 		uint8_t action = action1[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action1, i, 0, len1 - 1))
     +			if (!xdl_clean_mmatch(action1, i, len1))
    - 				action1[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action1[i] = DISCARD;
    + 				action = DISCARD;
     @@ xdiff/xprepare.c: static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
    - 	xdf2->nreff = 0;
    - 	for (i = 0; i < len2; i++) {
    - 		if (action2[i] == INVESTIGATE) {
    + 		uint8_t action = action2[i];
    + 
    + 		if (action == INVESTIGATE) {
     -			if (!xdl_clean_mmatch(action2, i, 0, len2 - 1))
     +			if (!xdl_clean_mmatch(action2, i, len2))
    - 				action2[i] = KEEP;
    + 				action = KEEP;
      			else
    - 				action2[i] = DISCARD;
    + 				action = DISCARD;
3:  cdcad99edc4 = 3:  24e65d42b72 xprepare: simplify error handling
4:  a3438dc0933 = 4:  d7cb49a7c99 xdiff: reduce the size of array
-- 
2.54.0.rc1.174.gd833f386ac5.dirty


  parent reply	other threads:[~2026-05-04 14:06 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-02 14:57 [PATCH 0/4] xdiff: reduce the size of a couple of arrays Phillip Wood
2026-04-02 14:57 ` [PATCH 1/4] xdiff: reduce size of action arrays Phillip Wood
2026-04-02 19:19   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
2026-04-02 19:20   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 3/4] xprepare: simplify error handling Phillip Wood
2026-04-02 19:24   ` Junio C Hamano
2026-04-02 14:57 ` [PATCH 4/4] xdiff: reduce the size of array Phillip Wood
2026-04-02 19:44   ` Junio C Hamano
2026-05-04 14:06 ` Phillip Wood [this message]
2026-05-04 14:06   ` [PATCH v2 1/4] xdiff: reduce size of action arrays Phillip Wood
2026-05-04 14:06   ` [PATCH v2 2/4] xdiff: cleanup xdl_clean_mmatch() Phillip Wood
2026-05-04 14:06   ` [PATCH v2 3/4] xprepare: simplify error handling Phillip Wood
2026-05-04 14:06   ` [PATCH v2 4/4] xdiff: reduce the size of array Phillip Wood

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cover.1777903579.git.phillip.wood@dunelm.org.uk \
    --to=phillip.wood123@gmail.com \
    --cc=ezekielnewren@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=phillip.wood@dunelm.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox