From: "Michael Montalbo via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Michael Montalbo <mmontalbo@gmail.com>,
Michael Montalbo <mmontalbo@gmail.com>
Subject: [PATCH v2 1/4] xdiff: support external hunks via xpparam_t
Date: Mon, 25 May 2026 18:29:55 +0000 [thread overview]
Message-ID: <f887a7e2ba697b6a2aa77319ed92cf4009f46871.1779733799.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2120.v2.git.1779733799.gitgitgadget@gmail.com>
From: Michael Montalbo <mmontalbo@gmail.com>
Add two new xpparam_t fields (external_hunks, external_hunks_nr)
that let callers supply pre-computed hunks. When set, xdl_diff()
populates the changed[] arrays from these hunks instead of running
the diff algorithm, then continues through compaction and emission
as usual.
Validate supplied hunks before use: reject out-of-bounds line
numbers, overlapping or out-of-order hunks, negative counts, and
violations of the synchronization invariant (unchanged line counts
must match between files). On validation failure, fall back to
the builtin diff algorithm.
Skip trim_common_tail() in xdi_diff() when external hunks are
present, since external hunks reference line numbers in the
original content.
Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
---
xdiff-interface.c | 7 +++-
xdiff/xdiff.h | 13 ++++++++
xdiff/xdiffi.c | 84 +++++++++++++++++++++++++++++++++++++++++++++--
xdiff/xprepare.c | 10 ++++++
xdiff/xprepare.h | 1 +
5 files changed, 112 insertions(+), 3 deletions(-)
diff --git a/xdiff-interface.c b/xdiff-interface.c
index f043330f2a..9542c0bcc2 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -124,7 +124,12 @@ int xdi_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, xdemitconf_t co
if (mf1->size > MAX_XDIFF_SIZE || mf2->size > MAX_XDIFF_SIZE)
return -1;
- if (!xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
+ /*
+ * External hunks reference line numbers in the original content;
+ * trimming the tail would change line counts and invalidate them.
+ */
+ if (!xpp->external_hunks &&
+ !xecfg->ctxlen && !(xecfg->flags & XDL_EMIT_FUNCCONTEXT))
trim_common_tail(&a, &b);
return xdl_diff(&a, &b, xpp, xecfg, xecb);
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index dc370712e9..2ee6f1aae3 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -78,6 +78,15 @@ typedef struct s_mmbuffer {
long size;
} mmbuffer_t;
+/*
+ * Hunk descriptor for externally computed diffs.
+ * Line numbers are 1-based, matching unified diff convention.
+ */
+struct xdl_hunk {
+ long old_start, old_count;
+ long new_start, new_count;
+};
+
typedef struct s_xpparam {
unsigned long flags;
@@ -88,6 +97,10 @@ typedef struct s_xpparam {
/* See Documentation/diff-options.adoc. */
char **anchors;
size_t anchors_nr;
+
+ /* Externally computed hunks: bypass the diff algorithm. */
+ const struct xdl_hunk *external_hunks;
+ size_t external_hunks_nr;
} xpparam_t;
typedef struct s_xdemitcb {
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 5455b4690d..e7d6190d37 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -1085,16 +1085,96 @@ static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
}
}
+/*
+ * Populate the changed[] arrays from externally supplied hunks,
+ * bypassing the diff algorithm. Validates that hunks are in order,
+ * non-overlapping, and within bounds.
+ *
+ * Returns 0 on success, -1 on validation failure.
+ */
+static int xdl_populate_hunks_from_external(xdfenv_t *xe,
+ const struct xdl_hunk *hunks,
+ size_t nr_hunks)
+{
+ size_t i;
+ long j, prev_old_end = 0, prev_new_end = 0;
+ long total_old = 0, total_new = 0;
+
+ xdl_clear_changed(&xe->xdf1);
+ xdl_clear_changed(&xe->xdf2);
+
+ for (i = 0; i < nr_hunks; i++) {
+ const struct xdl_hunk *h = &hunks[i];
+
+ if (h->old_count < 0 || h->new_count < 0)
+ return -1;
+
+ /* Bounds check (1-based line numbers) */
+ if (h->old_count > 0 &&
+ (h->old_start < 1 ||
+ h->old_start + h->old_count - 1 > (long)xe->xdf1.nrec))
+ return -1;
+ if (h->new_count > 0 &&
+ (h->new_start < 1 ||
+ h->new_start + h->new_count - 1 > (long)xe->xdf2.nrec))
+ return -1;
+
+ /* Zero-count hunks: start must still be in [1, nrec+1] */
+ if (h->old_count == 0 &&
+ (h->old_start < 1 || h->old_start > (long)xe->xdf1.nrec + 1))
+ return -1;
+ if (h->new_count == 0 &&
+ (h->new_start < 1 || h->new_start > (long)xe->xdf2.nrec + 1))
+ return -1;
+
+ /* Ordering: no overlap with previous hunk */
+ if (h->old_start < prev_old_end ||
+ h->new_start < prev_new_end)
+ return -1;
+
+ for (j = 0; j < h->old_count; j++)
+ xe->xdf1.changed[h->old_start - 1 + j] = true;
+ for (j = 0; j < h->new_count; j++)
+ xe->xdf2.changed[h->new_start - 1 + j] = true;
+
+ prev_old_end = h->old_start + h->old_count;
+ prev_new_end = h->new_start + h->new_count;
+ total_old += h->old_count;
+ total_new += h->new_count;
+ }
+
+ /*
+ * Synchronization invariant: unchanged line counts must match.
+ * Otherwise xdl_build_script() would walk off one array.
+ */
+ if ((long)xe->xdf1.nrec - total_old !=
+ (long)xe->xdf2.nrec - total_new)
+ return -1;
+
+ return 0;
+}
+
int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
xdchange_t *xscr;
xdfenv_t xe;
emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
- if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
+ if (xpp->external_hunks) {
+ if (xdl_prepare_env(mf1, mf2, xpp, &xe) < 0)
+ return -1;
+ if (xdl_populate_hunks_from_external(&xe,
+ xpp->external_hunks,
+ xpp->external_hunks_nr) == 0)
+ goto diff_done;
+ xdl_free_env(&xe);
+ }
+ if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0)
return -1;
- }
+
+diff_done:
+
if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
xdl_build_script(&xe, &xscr) < 0) {
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index cd4fc405eb..4645a9a746 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -432,3 +432,13 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
return 0;
}
+
+/*
+ * Reset the changed[] array so that no lines are marked as changed.
+ * Also clears the sentinel slots at changed[-1] and changed[nrec]
+ * that xdl_change_compact() relies on during backward scans.
+ */
+void xdl_clear_changed(xdfile_t *xdf)
+{
+ memset(xdf->changed - 1, 0, (xdf->nrec + 2) * sizeof(bool));
+}
diff --git a/xdiff/xprepare.h b/xdiff/xprepare.h
index 947d9fc1bb..0413baf07b 100644
--- a/xdiff/xprepare.h
+++ b/xdiff/xprepare.h
@@ -28,6 +28,7 @@
int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe);
void xdl_free_env(xdfenv_t *xe);
+void xdl_clear_changed(xdfile_t *xdf);
--
gitgitgadget
next prev parent reply other threads:[~2026-05-25 18:30 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-22 2:11 [PATCH 0/5] [RFC] diff: add diff.<driver>.process for external hunk providers Michael Montalbo via GitGitGadget
2026-05-22 2:11 ` [PATCH 1/5] xdiff: support external hunks via xpparam_t Michael Montalbo via GitGitGadget
2026-05-22 5:29 ` Junio C Hamano
2026-05-22 19:06 ` Michael Montalbo
2026-05-24 8:50 ` Junio C Hamano
2026-05-24 18:01 ` Michael Montalbo
2026-05-22 2:11 ` [PATCH 2/5] userdiff: add diff.<driver>.process config Michael Montalbo via GitGitGadget
2026-05-22 2:11 ` [PATCH 3/5] diff: add long-running diff process via diff.<driver>.process Michael Montalbo via GitGitGadget
2026-05-22 2:11 ` [PATCH 4/5] blame: consult diff process for zero-hunk detection Michael Montalbo via GitGitGadget
2026-05-22 2:11 ` [PATCH 5/5] diff-process-normalize: add built-in whitespace normalizer Michael Montalbo via GitGitGadget
2026-05-22 5:29 ` [PATCH 0/5] [RFC] diff: add diff.<driver>.process for external hunk providers Junio C Hamano
2026-05-22 17:19 ` Michael Montalbo
2026-05-25 18:29 ` [PATCH v2 0/4] " Michael Montalbo via GitGitGadget
2026-05-25 18:29 ` Michael Montalbo via GitGitGadget [this message]
2026-05-25 18:29 ` [PATCH v2 2/4] userdiff: add diff.<driver>.process config Michael Montalbo via GitGitGadget
2026-05-25 18:29 ` [PATCH v2 3/4] diff: add long-running diff process via diff.<driver>.process Michael Montalbo via GitGitGadget
2026-05-25 18:29 ` [PATCH v2 4/4] blame: consult diff process for zero-hunk detection Michael Montalbo via GitGitGadget
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f887a7e2ba697b6a2aa77319ed92cf4009f46871.1779733799.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=mmontalbo@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox