* [PATCH] diff: simplify line-range filter by classifying removals immediately
@ 2026-04-26 19:10 Michael Montalbo via GitGitGadget
0 siblings, 0 replies; only message in thread
From: Michael Montalbo via GitGitGadget @ 2026-04-26 19:10 UTC (permalink / raw)
To: git; +Cc: Michael Montalbo, Michael Montalbo
From: Michael Montalbo <mmontalbo@gmail.com>
The line-range filter buffered '-' (removal) lines in a pending_rm
strbuf, deferring their classification until a '+' or ' ' line
arrived to reveal the post-image position.
This buffering is unnecessary. Removal lines are pre-image content
that occupies no post-image space, so they do not advance lno_post.
Within a hunk, xdiff emits removals before additions for each
change, so a '-' line always arrives while lno_post is at the same
position that the following '+' or ' ' line will occupy. Each
line can therefore be classified against the tracked ranges as it
arrives, without waiting for a non-removal line to confirm the
position.
The buffering also had a bug: flush_rhunk() unconditionally drained
the pending buffer when the range hunk was active, even if lno_post
had advanced past the tracked range. This caused deletions
immediately after the tracked function to be incorrectly included
in patch output.
Remove the pending_rm buffer and classify '-' lines using the same
in_range check applied to '+' and ' ' lines. This simplifies the
filter, removes three struct fields and a helper function, and
makes the flush_rhunk() bug impossible by construction.
Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
---
diff: simplify line-range filter by classifying removals immediately
The line-range filter buffered '-' (removal) lines in a
pending_rm strbuf, deferring their classification until a '+'
or ' ' line arrived to reveal the post-image position.
This buffering is unnecessary. Removal lines are pre-image
content that occupies no post-image space, so they do not
advance lno_post. Within a hunk, xdiff emits removals before additions
for each change, so a '-' line always arrives while
lno_post is at the same position that the following '+' or ' ' line will
occupy. Each line can therefore be classified against
the tracked ranges as it arrives, without waiting for a
non-removal line to confirm the position.
The buffering also had a bug: flush_rhunk() unconditionally
drained the pending buffer when the range hunk was active, even
if lno_post had advanced past the tracked range. This caused deletions
immediately after the tracked function to be
incorrectly included in patch output.
Remove the pending_rm buffer and classify '-' lines using the
same in_range check applied to '+' and ' ' lines. This simplifies the
filter, removes three struct fields and a helper function, and makes the
flush_rhunk() bug impossible by
construction.
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2099%2Fmmontalbo%2Fmm%2Fline-log-immediate-classify-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2099/mmontalbo/mm/line-log-immediate-classify-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2099
diff.c | 110 ++++++++++++---------------------------
t/t4211-line-log.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 157 insertions(+), 77 deletions(-)
diff --git a/diff.c b/diff.c
index 397e38b41c..5662d080be 100644
--- a/diff.c
+++ b/diff.c
@@ -616,11 +616,6 @@ struct emit_callback {
* the requested ranges. Contiguous in-range lines are collected into
* range hunks and flushed with a synthetic @@ header so that
* fn_out_consume() sees well-formed unified-diff fragments.
- *
- * Removal lines ('-') cannot be classified by post-image position, so
- * they are buffered in pending_rm until the next '+' or ' ' line
- * reveals whether they precede an in-range line (flush into range hunk) or
- * an out-of-range line (discard).
*/
struct line_range_callback {
xdiff_emit_line_fn orig_line_fn;
@@ -646,11 +641,6 @@ struct line_range_callback {
int rhunk_active;
int rhunk_has_changes; /* any '+' or '-' lines? */
- /* Removal lines not yet known to be in-range */
- struct strbuf pending_rm;
- int pending_rm_count;
- long pending_rm_pre_begin; /* pre-image line of first pending */
-
int ret; /* latched error from orig_line_fn */
};
@@ -2539,12 +2529,6 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED
return 1;
}
-static void discard_pending_rm(struct line_range_callback *s)
-{
- strbuf_reset(&s->pending_rm);
- s->pending_rm_count = 0;
-}
-
static void flush_rhunk(struct line_range_callback *s)
{
struct strbuf hdr = STRBUF_INIT;
@@ -2553,14 +2537,6 @@ static void flush_rhunk(struct line_range_callback *s)
if (!s->rhunk_active || s->ret)
return;
- /* Drain any pending removal lines into the range hunk */
- if (s->pending_rm_count) {
- strbuf_addbuf(&s->rhunk, &s->pending_rm);
- s->rhunk_old_count += s->pending_rm_count;
- s->rhunk_has_changes = 1;
- discard_pending_rm(s);
- }
-
/*
* Suppress context-only hunks: they contain no actual changes
* and would just be noise. This can happen when the inflated
@@ -2615,11 +2591,6 @@ static void line_range_hunk_fn(void *data,
* When count > 0, begin is 1-based. When count == 0, begin is
* adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of
* that type will arrive, so the value is unused.
- *
- * Any pending removal lines from the previous xdiff hunk are
- * intentionally left in pending_rm: the line callback will
- * flush or discard them when the next content line reveals
- * whether the removals precede in-range content.
*/
s->lno_post = new_begin;
s->lno_pre = old_begin;
@@ -2635,88 +2606,75 @@ static void line_range_hunk_fn(void *data,
static int line_range_line_fn(void *priv, char *line, unsigned long len)
{
struct line_range_callback *s = priv;
- const struct range *cur;
- long lno_0, cur_pre;
+ long lno_0;
+ int in_range;
if (s->ret)
return s->ret;
- if (line[0] == '-') {
- if (!s->pending_rm_count)
- s->pending_rm_pre_begin = s->lno_pre;
- s->lno_pre++;
- strbuf_add(&s->pending_rm, line, len);
- s->pending_rm_count++;
- return s->ret;
- }
-
if (line[0] == '\\') {
- if (s->pending_rm_count)
- strbuf_add(&s->pending_rm, line, len);
- else if (s->rhunk_active)
+ if (s->rhunk_active)
strbuf_add(&s->rhunk, line, len);
- /* otherwise outside tracked range; drop silently */
return s->ret;
}
- if (line[0] != '+' && line[0] != ' ')
+ if (line[0] != '+' && line[0] != ' ' && line[0] != '-')
BUG("unexpected diff line type '%c'", line[0]);
+ /*
+ * Compute post-image position. '+' and ' ' lines advance
+ * lno_post; '-' lines do not (they occupy no post-image space).
+ */
lno_0 = s->lno_post - 1;
- cur_pre = s->lno_pre; /* save before advancing for context lines */
- s->lno_post++;
- if (line[0] == ' ')
- s->lno_pre++;
+ if (line[0] != '-')
+ s->lno_post++;
- /* Advance past ranges we've passed */
+ /*
+ * Advance past any ranges we've moved beyond. Emit the
+ * accumulated range hunk for the range we're leaving.
+ */
while (s->cur_range < s->ranges->nr &&
lno_0 >= s->ranges->ranges[s->cur_range].end) {
if (s->rhunk_active)
flush_rhunk(s);
- discard_pending_rm(s);
s->cur_range++;
}
- /* Past all ranges */
- if (s->cur_range >= s->ranges->nr) {
- discard_pending_rm(s);
- return s->ret;
- }
-
- cur = &s->ranges->ranges[s->cur_range];
+ in_range = s->cur_range < s->ranges->nr &&
+ lno_0 >= s->ranges->ranges[s->cur_range].start &&
+ lno_0 < s->ranges->ranges[s->cur_range].end;
- /* Before current range */
- if (lno_0 < cur->start) {
- discard_pending_rm(s);
+ if (!in_range) {
+ if (line[0] != '+')
+ s->lno_pre++;
return s->ret;
}
- /* In range so start a new range hunk if needed */
+ /* Start a new range hunk if this is the first in-range line */
if (!s->rhunk_active) {
s->rhunk_active = 1;
s->rhunk_has_changes = 0;
s->rhunk_new_begin = lno_0 + 1;
- s->rhunk_old_begin = s->pending_rm_count
- ? s->pending_rm_pre_begin : cur_pre;
+ s->rhunk_old_begin = s->lno_pre;
s->rhunk_old_count = 0;
s->rhunk_new_count = 0;
strbuf_reset(&s->rhunk);
}
- /* Flush pending removals into range hunk */
- if (s->pending_rm_count) {
- strbuf_addbuf(&s->rhunk, &s->pending_rm);
- s->rhunk_old_count += s->pending_rm_count;
- s->rhunk_has_changes = 1;
- discard_pending_rm(s);
- }
-
+ /* Append line to the range hunk */
strbuf_add(&s->rhunk, line, len);
- s->rhunk_new_count++;
- if (line[0] == '+')
+ if (line[0] == '-') {
+ s->rhunk_old_count++;
s->rhunk_has_changes = 1;
- else
+ s->lno_pre++;
+ } else if (line[0] == '+') {
+ s->rhunk_new_count++;
+ s->rhunk_has_changes = 1;
+ } else {
s->rhunk_old_count++;
+ s->rhunk_new_count++;
+ s->lno_pre++;
+ }
return s->ret;
}
@@ -4072,7 +4030,6 @@ static void builtin_diff(const char *name_a,
lr_state.orig_cb_data = &ecbdata;
lr_state.ranges = line_ranges;
strbuf_init(&lr_state.rhunk, 0);
- strbuf_init(&lr_state.pending_rm, 0);
/*
* Inflate ctxlen so that all changes within
@@ -4107,7 +4064,6 @@ static void builtin_diff(const char *name_a,
die("unable to generate diff for %s",
one->path);
strbuf_release(&lr_state.rhunk);
- strbuf_release(&lr_state.pending_rm);
} else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume,
&ecbdata, &xpp, &xecfg))
die("unable to generate diff for %s", one->path);
diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh
index aaf197d2ed..2bff0e4c26 100755
--- a/t/t4211-line-log.sh
+++ b/t/t4211-line-log.sh
@@ -711,4 +711,128 @@ test_expect_success '-L with -G filters to diff-text matches' '
grep "F2 + 2" actual
'
+test_expect_success 'setup for trailing deletion test' '
+ git checkout --orphan trailing-del &&
+ git reset --hard &&
+ cat >file.c <<-\EOF &&
+ void tracked()
+ {
+ return 1;
+ }
+ // trailing comment
+ EOF
+ git add file.c &&
+ test_tick &&
+ git commit -m "add file with trailing comment" &&
+ # Modify tracked() AND delete the trailing comment in
+ # one commit, so the commit touches the tracked range
+ # and is not filtered out by the revision walker.
+ cat >file.c <<-\EOF &&
+ void tracked()
+ {
+ return 2;
+ }
+ EOF
+ git commit -a -m "modify tracked and delete trailing comment"
+'
+
+test_expect_success '-L does not include deletions past end of tracked range' '
+ git log -L:tracked:file.c --format= -1 -p >actual &&
+ # The trailing comment deletion is outside the tracked
+ # range and should not appear in the patch output.
+ grep "return 2" actual &&
+ ! grep "trailing comment" actual
+'
+
+test_expect_success '-L includes leading deletions resolved by in-range line' '
+ git checkout --orphan leading-del &&
+ git reset --hard &&
+ cat >file.c <<-\EOF &&
+ // leading comment
+ void tracked()
+ {
+ return 1;
+ }
+ EOF
+ git add file.c &&
+ test_tick &&
+ git commit -m "add file with leading comment" &&
+ cat >file.c <<-\EOF &&
+ void tracked()
+ {
+ return 2;
+ }
+ EOF
+ git commit -a -m "modify tracked and delete leading comment" &&
+ git log -L:tracked:file.c --format= -1 -p >actual &&
+ # The leading comment deletion is resolved by the next
+ # non-removal line (void tracked), which is in range.
+ # Pending removals are attributed to the range they precede.
+ grep "return 2" actual &&
+ grep "leading comment" actual
+'
+
+test_expect_success 'setup for line-range filter edge cases' '
+ git checkout --orphan filter-edge &&
+ git reset --hard &&
+ cat >file.c <<-\EOF &&
+ void before()
+ {
+ return 0;
+ }
+
+ void tracked()
+ {
+ int a = 1;
+ int b = 2;
+ int c = 3;
+ return a + b + c;
+ }
+
+ void after()
+ {
+ return 9;
+ }
+ EOF
+ git add file.c &&
+ test_tick &&
+ git commit -m "initial"
+'
+
+test_expect_success '-L change at exact first line of range' '
+ git checkout filter-edge &&
+ # Change the function signature (first line of range)
+ sed "s/void tracked/int tracked/" file.c >tmp &&
+ mv tmp file.c &&
+ git commit -a -m "change first line" &&
+ git log -L:tracked:file.c -p --format=%s -1 >actual &&
+ grep "change first line" actual &&
+ grep "+int tracked" actual &&
+ grep "\\-void tracked" actual
+'
+
+test_expect_success '-L change at exact last line of range' '
+ git checkout filter-edge &&
+ git reset --hard HEAD~1 &&
+ # Change the closing brace line (last line of range)
+ sed "s/^}$/} \/\/ end tracked/" file.c >tmp &&
+ mv tmp file.c &&
+ git commit -a -m "change last line" &&
+ git log -L:tracked:file.c -p --format=%s -1 >actual &&
+ grep "change last line" actual &&
+ grep "end tracked" actual
+'
+
+test_expect_success '-L pure deletion in range (no additions)' '
+ git checkout filter-edge &&
+ git reset --hard HEAD~1 &&
+ # Delete a line inside tracked() without adding anything
+ sed "/int c/d" file.c >tmp &&
+ mv tmp file.c &&
+ git commit -a -m "pure deletion" &&
+ git log -L:tracked:file.c -p --format=%s -1 >actual &&
+ grep "pure deletion" actual &&
+ grep "\\-.*int c" actual
+'
+
test_done
base-commit: 9f223ef1c026d91c7ac68cc0211bde255dda6199
--
gitgitgadget
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-04-26 19:10 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-26 19:10 [PATCH] diff: simplify line-range filter by classifying removals immediately Michael Montalbo via GitGitGadget
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox