Git development
 help / color / mirror / Atom feed
From: "Michael Montalbo via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Michael Montalbo <mmontalbo@gmail.com>,
	Michael Montalbo <mmontalbo@gmail.com>
Subject: [PATCH v2 1/4] xdiff: support external hunks via xpparam_t
Date: Mon, 25 May 2026 18:29:55 +0000	[thread overview]
Message-ID: <f887a7e2ba697b6a2aa77319ed92cf4009f46871.1779733799.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2120.v2.git.1779733799.gitgitgadget@gmail.com>

From: Michael Montalbo <mmontalbo@gmail.com>

Add two new xpparam_t fields (external_hunks, external_hunks_nr)
that let callers supply pre-computed hunks.  When set, xdl_diff()
populates the changed[] arrays from these hunks instead of running
the diff algorithm, then continues through compaction and emission
as usual.

Validate supplied hunks before use: reject out-of-bounds line
numbers, overlapping or out-of-order hunks, negative counts, and
violations of the synchronization invariant (unchanged line counts
must match between files).  On validation failure, fall back to
the builtin diff algorithm.

Skip trim_common_tail() in xdi_diff() when external hunks are
present, since external hunks reference line numbers in the
original content.

Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
---
 xdiff-interface.c |  7 +++-
 xdiff/xdiff.h     | 13 ++++++++
 xdiff/xdiffi.c    | 84 +++++++++++++++++++++++++++++++++++++++++++++--
 xdiff/xprepare.c  | 10 ++++++
 xdiff/xprepare.h  |  1 +
 5 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/xdiff-interface.c b/xdiff-interface.c
index f043330f2a..9542c0bcc2 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -124,7 +124,12 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co
 	if (mf1->size > MAX_XDIFF_SIZE || mf2->size > MAX_XDIFF_SIZE)
 		return -1;
 
-	if (!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
+	/*
+	 * External hunks reference line numbers in the original content;
+	 * trimming the tail would change line counts and invalidate them.
+	 */
+	if (!xpp->external_hunks &&
+	    !xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
 		trim_common_tail(&a, &b);
 
 	return xdl_diff(&a, &b, xpp, xecfg, xecb);
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index dc370712e9..2ee6f1aae3 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -78,6 +78,15 @@ typedef struct s_mmbuffer {
 	long size;
 } mmbuffer_t;
 
+/*
+ * Hunk descriptor for externally computed diffs.
+ * Line numbers are 1-based, matching unified diff convention.
+ */
+struct xdl_hunk {
+	long old_start, old_count;
+	long new_start, new_count;
+};
+
 typedef struct s_xpparam {
 	unsigned long flags;
 
@@ -88,6 +97,10 @@ typedef struct s_xpparam {
 	/* See Documentation/diff-options.adoc. */
 	char **anchors;
 	size_t anchors_nr;
+
+	/* Externally computed hunks: bypass the diff algorithm. */
+	const struct xdl_hunk *external_hunks;
+	size_t external_hunks_nr;
 } xpparam_t;
 
 typedef struct s_xdemitcb {
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 5455b4690d..e7d6190d37 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -1085,16 +1085,96 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
 	}
 }
 
+/*
+ * Populate the changed[] arrays from externally supplied hunks,
+ * bypassing the diff algorithm.  Validates that hunks are in order,
+ * non-overlapping, and within bounds.
+ *
+ * Returns 0 on success, -1 on validation failure.
+ */
+static int xdl_populate_hunks_from_external(xdfenv_t *xe,
+					    const struct xdl_hunk *hunks,
+					    size_t nr_hunks)
+{
+	size_t i;
+	long j, prev_old_end = 0, prev_new_end = 0;
+	long total_old = 0, total_new = 0;
+
+	xdl_clear_changed(&xe->xdf1);
+	xdl_clear_changed(&xe->xdf2);
+
+	for (i = 0; i < nr_hunks; i++) {
+		const struct xdl_hunk *h = &hunks[i];
+
+		if (h->old_count < 0 || h->new_count < 0)
+			return -1;
+
+		/* Bounds check (1-based line numbers) */
+		if (h->old_count > 0 &&
+		    (h->old_start < 1 ||
+		     h->old_start + h->old_count - 1 > (long)xe->xdf1.nrec))
+			return -1;
+		if (h->new_count > 0 &&
+		    (h->new_start < 1 ||
+		     h->new_start + h->new_count - 1 > (long)xe->xdf2.nrec))
+			return -1;
+
+		/* Zero-count hunks: start must still be in [1, nrec+1] */
+		if (h->old_count == 0 &&
+		    (h->old_start < 1 || h->old_start > (long)xe->xdf1.nrec + 1))
+			return -1;
+		if (h->new_count == 0 &&
+		    (h->new_start < 1 || h->new_start > (long)xe->xdf2.nrec + 1))
+			return -1;
+
+		/* Ordering: no overlap with previous hunk */
+		if (h->old_start < prev_old_end ||
+		    h->new_start < prev_new_end)
+			return -1;
+
+		for (j = 0; j < h->old_count; j++)
+			xe->xdf1.changed[h->old_start - 1 + j] = true;
+		for (j = 0; j < h->new_count; j++)
+			xe->xdf2.changed[h->new_start - 1 + j] = true;
+
+		prev_old_end = h->old_start + h->old_count;
+		prev_new_end = h->new_start + h->new_count;
+		total_old += h->old_count;
+		total_new += h->new_count;
+	}
+
+	/*
+	 * Synchronization invariant: unchanged line counts must match.
+	 * Otherwise xdl_build_script() would walk off one array.
+	 */
+	if ((long)xe->xdf1.nrec - total_old !=
+	    (long)xe->xdf2.nrec - total_new)
+		return -1;
+
+	return 0;
+}
+
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
 	xdfenv_t xe;
 	emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
 
-	if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
+	if (xpp->external_hunks) {
+		if (xdl_prepare_env(mf1, mf2, xpp, &xe) < 0)
+			return -1;
+		if (xdl_populate_hunks_from_external(&xe,
+						     xpp->external_hunks,
+						     xpp->external_hunks_nr) == 0)
+			goto diff_done;
+		xdl_free_env(&xe);
+	}
 
+	if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0)
 		return -1;
-	}
+
+diff_done:
+
 	if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
 	    xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
 	    xdl_build_script(&xe, &xscr) < 0) {
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index cd4fc405eb..4645a9a746 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -432,3 +432,13 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 
 	return 0;
 }
+
+/*
+ * Reset the changed[] array so that no lines are marked as changed.
+ * Also clears the sentinel slots at changed[-1] and changed[nrec]
+ * that xdl_change_compact() relies on during backward scans.
+ */
+void xdl_clear_changed(xdfile_t *xdf)
+{
+	memset(xdf->changed - 1, 0, (xdf->nrec + 2) * sizeof(bool));
+}
diff --git a/xdiff/xprepare.h b/xdiff/xprepare.h
index 947d9fc1bb..0413baf07b 100644
--- a/xdiff/xprepare.h
+++ b/xdiff/xprepare.h
@@ -28,6 +28,7 @@
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 		    xdfenv_t *xe);
 void xdl_free_env(xdfenv_t *xe);
+void xdl_clear_changed(xdfile_t *xdf);
 
 
 
-- 
gitgitgadget


  reply	other threads:[~2026-05-25 18:30 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-22  2:11 [PATCH 0/5] [RFC] diff: add diff.<driver>.process for external hunk providers Michael Montalbo via GitGitGadget
2026-05-22  2:11 ` [PATCH 1/5] xdiff: support external hunks via xpparam_t Michael Montalbo via GitGitGadget
2026-05-22  5:29   ` Junio C Hamano
2026-05-22 19:06     ` Michael Montalbo
2026-05-24  8:50       ` Junio C Hamano
2026-05-24 18:01         ` Michael Montalbo
2026-05-22  2:11 ` [PATCH 2/5] userdiff: add diff.<driver>.process config Michael Montalbo via GitGitGadget
2026-05-22  2:11 ` [PATCH 3/5] diff: add long-running diff process via diff.<driver>.process Michael Montalbo via GitGitGadget
2026-05-22  2:11 ` [PATCH 4/5] blame: consult diff process for zero-hunk detection Michael Montalbo via GitGitGadget
2026-05-22  2:11 ` [PATCH 5/5] diff-process-normalize: add built-in whitespace normalizer Michael Montalbo via GitGitGadget
2026-05-22  5:29 ` [PATCH 0/5] [RFC] diff: add diff.<driver>.process for external hunk providers Junio C Hamano
2026-05-22 17:19   ` Michael Montalbo
2026-05-25 18:29 ` [PATCH v2 0/4] " Michael Montalbo via GitGitGadget
2026-05-25 18:29   ` Michael Montalbo via GitGitGadget [this message]
2026-05-25 18:29   ` [PATCH v2 2/4] userdiff: add diff.<driver>.process config Michael Montalbo via GitGitGadget
2026-05-25 18:29   ` [PATCH v2 3/4] diff: add long-running diff process via diff.<driver>.process Michael Montalbo via GitGitGadget
2026-05-25 18:29   ` [PATCH v2 4/4] blame: consult diff process for zero-hunk detection Michael Montalbo via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f887a7e2ba697b6a2aa77319ed92cf4009f46871.1779733799.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=mmontalbo@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox