Git development
 help / color / mirror / Atom feed
* [PATCH] diff: simplify line-range filter by classifying removals immediately
@ 2026-04-26 19:10 Michael Montalbo via GitGitGadget
  0 siblings, 0 replies; only message in thread
From: Michael Montalbo via GitGitGadget @ 2026-04-26 19:10 UTC (permalink / raw)
  To: git; +Cc: Michael Montalbo, Michael Montalbo

From: Michael Montalbo <mmontalbo@gmail.com>

The line-range filter buffered '-' (removal) lines in a pending_rm
strbuf, deferring their classification until a '+' or ' ' line
arrived to reveal the post-image position.

This buffering is unnecessary.  Removal lines are pre-image content
that occupies no post-image space, so they do not advance lno_post.
Within a hunk, xdiff emits removals before additions for each
change, so a '-' line always arrives while lno_post is at the same
position that the following '+' or ' ' line will occupy.  Each
line can therefore be classified against the tracked ranges as it
arrives, without waiting for a non-removal line to confirm the
position.

The buffering also had a bug: flush_rhunk() unconditionally drained
the pending buffer when the range hunk was active, even if lno_post
had advanced past the tracked range.  This caused deletions
immediately after the tracked function to be incorrectly included
in patch output.

Remove the pending_rm buffer and classify '-' lines using the same
in_range check applied to '+' and ' ' lines.  This simplifies the
filter, removes three struct fields and a helper function, and
makes the flush_rhunk() bug impossible by construction.

Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
---
    diff: simplify line-range filter by classifying removals immediately
    
    The line-range filter buffered '-' (removal) lines in a
    pending_rm strbuf, deferring their classification until a '+'
    or ' ' line arrived to reveal the post-image position.
    
    This buffering is unnecessary. Removal lines are pre-image
    content that occupies no post-image space, so they do not
    advance lno_post. Within a hunk, xdiff emits removals before additions
    for each change, so a '-' line always arrives while
    lno_post is at the same position that the following '+' or ' ' line will
    occupy. Each line can therefore be classified against
    the tracked ranges as it arrives, without waiting for a
    non-removal line to confirm the position.
    
    The buffering also had a bug: flush_rhunk() unconditionally
    drained the pending buffer when the range hunk was active, even
    if lno_post had advanced past the tracked range. This caused deletions
    immediately after the tracked function to be
    incorrectly included in patch output.
    
    Remove the pending_rm buffer and classify '-' lines using the
    same in_range check applied to '+' and ' ' lines. This simplifies the
    filter, removes three struct fields and a helper function, and makes the
    flush_rhunk() bug impossible by
    construction.

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2099%2Fmmontalbo%2Fmm%2Fline-log-immediate-classify-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2099/mmontalbo/mm/line-log-immediate-classify-v1
Pull-Request: https://github.com/gitgitgadget/git/pull/2099

 diff.c              | 110 ++++++++++++---------------------------
 t/t4211-line-log.sh | 124 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+), 77 deletions(-)

diff --git a/diff.c b/diff.c
index 397e38b41c..5662d080be 100644
--- a/diff.c
+++ b/diff.c
@@ -616,11 +616,6 @@ struct emit_callback {
  * the requested ranges.  Contiguous in-range lines are collected into
  * range hunks and flushed with a synthetic @@ header so that
  * fn_out_consume() sees well-formed unified-diff fragments.
- *
- * Removal lines ('-') cannot be classified by post-image position, so
- * they are buffered in pending_rm until the next '+' or ' ' line
- * reveals whether they precede an in-range line (flush into range hunk) or
- * an out-of-range line (discard).
  */
 struct line_range_callback {
 	xdiff_emit_line_fn orig_line_fn;
@@ -646,11 +641,6 @@ struct line_range_callback {
 	int rhunk_active;
 	int rhunk_has_changes;		/* any '+' or '-' lines? */
 
-	/* Removal lines not yet known to be in-range */
-	struct strbuf pending_rm;
-	int pending_rm_count;
-	long pending_rm_pre_begin;	/* pre-image line of first pending */
-
 	int ret;			/* latched error from orig_line_fn */
 };
 
@@ -2539,12 +2529,6 @@ static int quick_consume(void *priv, char *line UNUSED, unsigned long len UNUSED
 	return 1;
 }
 
-static void discard_pending_rm(struct line_range_callback *s)
-{
-	strbuf_reset(&s->pending_rm);
-	s->pending_rm_count = 0;
-}
-
 static void flush_rhunk(struct line_range_callback *s)
 {
 	struct strbuf hdr = STRBUF_INIT;
@@ -2553,14 +2537,6 @@ static void flush_rhunk(struct line_range_callback *s)
 	if (!s->rhunk_active || s->ret)
 		return;
 
-	/* Drain any pending removal lines into the range hunk */
-	if (s->pending_rm_count) {
-		strbuf_addbuf(&s->rhunk, &s->pending_rm);
-		s->rhunk_old_count += s->pending_rm_count;
-		s->rhunk_has_changes = 1;
-		discard_pending_rm(s);
-	}
-
 	/*
 	 * Suppress context-only hunks: they contain no actual changes
 	 * and would just be noise.  This can happen when the inflated
@@ -2615,11 +2591,6 @@ static void line_range_hunk_fn(void *data,
 	 * When count > 0, begin is 1-based.  When count == 0, begin is
 	 * adjusted down by 1 by xdl_emit_hunk_hdr(), but no lines of
 	 * that type will arrive, so the value is unused.
-	 *
-	 * Any pending removal lines from the previous xdiff hunk are
-	 * intentionally left in pending_rm: the line callback will
-	 * flush or discard them when the next content line reveals
-	 * whether the removals precede in-range content.
 	 */
 	s->lno_post = new_begin;
 	s->lno_pre = old_begin;
@@ -2635,88 +2606,75 @@ static void line_range_hunk_fn(void *data,
 static int line_range_line_fn(void *priv, char *line, unsigned long len)
 {
 	struct line_range_callback *s = priv;
-	const struct range *cur;
-	long lno_0, cur_pre;
+	long lno_0;
+	int in_range;
 
 	if (s->ret)
 		return s->ret;
 
-	if (line[0] == '-') {
-		if (!s->pending_rm_count)
-			s->pending_rm_pre_begin = s->lno_pre;
-		s->lno_pre++;
-		strbuf_add(&s->pending_rm, line, len);
-		s->pending_rm_count++;
-		return s->ret;
-	}
-
 	if (line[0] == '\\') {
-		if (s->pending_rm_count)
-			strbuf_add(&s->pending_rm, line, len);
-		else if (s->rhunk_active)
+		if (s->rhunk_active)
 			strbuf_add(&s->rhunk, line, len);
-		/* otherwise outside tracked range; drop silently */
 		return s->ret;
 	}
 
-	if (line[0] != '+' && line[0] != ' ')
+	if (line[0] != '+' && line[0] != ' ' && line[0] != '-')
 		BUG("unexpected diff line type '%c'", line[0]);
 
+	/*
+	 * Compute post-image position.  '+' and ' ' lines advance
+	 * lno_post; '-' lines do not (they occupy no post-image space).
+	 */
 	lno_0 = s->lno_post - 1;
-	cur_pre = s->lno_pre;	/* save before advancing for context lines */
-	s->lno_post++;
-	if (line[0] == ' ')
-		s->lno_pre++;
+	if (line[0] != '-')
+		s->lno_post++;
 
-	/* Advance past ranges we've passed */
+	/*
+	 * Advance past any ranges we've moved beyond.  Emit the
+	 * accumulated range hunk for the range we're leaving.
+	 */
 	while (s->cur_range < s->ranges->nr &&
 	       lno_0 >= s->ranges->ranges[s->cur_range].end) {
 		if (s->rhunk_active)
 			flush_rhunk(s);
-		discard_pending_rm(s);
 		s->cur_range++;
 	}
 
-	/* Past all ranges */
-	if (s->cur_range >= s->ranges->nr) {
-		discard_pending_rm(s);
-		return s->ret;
-	}
-
-	cur = &s->ranges->ranges[s->cur_range];
+	in_range = s->cur_range < s->ranges->nr &&
+		   lno_0 >= s->ranges->ranges[s->cur_range].start &&
+		   lno_0 < s->ranges->ranges[s->cur_range].end;
 
-	/* Before current range */
-	if (lno_0 < cur->start) {
-		discard_pending_rm(s);
+	if (!in_range) {
+		if (line[0] != '+')
+			s->lno_pre++;
 		return s->ret;
 	}
 
-	/* In range so start a new range hunk if needed */
+	/* Start a new range hunk if this is the first in-range line */
 	if (!s->rhunk_active) {
 		s->rhunk_active = 1;
 		s->rhunk_has_changes = 0;
 		s->rhunk_new_begin = lno_0 + 1;
-		s->rhunk_old_begin = s->pending_rm_count
-			? s->pending_rm_pre_begin : cur_pre;
+		s->rhunk_old_begin = s->lno_pre;
 		s->rhunk_old_count = 0;
 		s->rhunk_new_count = 0;
 		strbuf_reset(&s->rhunk);
 	}
 
-	/* Flush pending removals into range hunk */
-	if (s->pending_rm_count) {
-		strbuf_addbuf(&s->rhunk, &s->pending_rm);
-		s->rhunk_old_count += s->pending_rm_count;
-		s->rhunk_has_changes = 1;
-		discard_pending_rm(s);
-	}
-
+	/* Append line to the range hunk */
 	strbuf_add(&s->rhunk, line, len);
-	s->rhunk_new_count++;
-	if (line[0] == '+')
+	if (line[0] == '-') {
+		s->rhunk_old_count++;
 		s->rhunk_has_changes = 1;
-	else
+		s->lno_pre++;
+	} else if (line[0] == '+') {
+		s->rhunk_new_count++;
+		s->rhunk_has_changes = 1;
+	} else {
 		s->rhunk_old_count++;
+		s->rhunk_new_count++;
+		s->lno_pre++;
+	}
 
 	return s->ret;
 }
@@ -4072,7 +4030,6 @@ static void builtin_diff(const char *name_a,
 			lr_state.orig_cb_data = &ecbdata;
 			lr_state.ranges = line_ranges;
 			strbuf_init(&lr_state.rhunk, 0);
-			strbuf_init(&lr_state.pending_rm, 0);
 
 			/*
 			 * Inflate ctxlen so that all changes within
@@ -4107,7 +4064,6 @@ static void builtin_diff(const char *name_a,
 				die("unable to generate diff for %s",
 				    one->path);
 			strbuf_release(&lr_state.rhunk);
-			strbuf_release(&lr_state.pending_rm);
 		} else if (xdi_diff_outf(&mf1, &mf2, NULL, fn_out_consume,
 					 &ecbdata, &xpp, &xecfg))
 			die("unable to generate diff for %s", one->path);
diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh
index aaf197d2ed..2bff0e4c26 100755
--- a/t/t4211-line-log.sh
+++ b/t/t4211-line-log.sh
@@ -711,4 +711,128 @@ test_expect_success '-L with -G filters to diff-text matches' '
 	grep "F2 + 2" actual
 '
 
+test_expect_success 'setup for trailing deletion test' '
+	git checkout --orphan trailing-del &&
+	git reset --hard &&
+	cat >file.c <<-\EOF &&
+	void tracked()
+	{
+	    return 1;
+	}
+	// trailing comment
+	EOF
+	git add file.c &&
+	test_tick &&
+	git commit -m "add file with trailing comment" &&
+	# Modify tracked() AND delete the trailing comment in
+	# one commit, so the commit touches the tracked range
+	# and is not filtered out by the revision walker.
+	cat >file.c <<-\EOF &&
+	void tracked()
+	{
+	    return 2;
+	}
+	EOF
+	git commit -a -m "modify tracked and delete trailing comment"
+'
+
+test_expect_success '-L does not include deletions past end of tracked range' '
+	git log -L:tracked:file.c --format= -1 -p >actual &&
+	# The trailing comment deletion is outside the tracked
+	# range and should not appear in the patch output.
+	grep "return 2" actual &&
+	! grep "trailing comment" actual
+'
+
+test_expect_success '-L includes leading deletions resolved by in-range line' '
+	git checkout --orphan leading-del &&
+	git reset --hard &&
+	cat >file.c <<-\EOF &&
+	// leading comment
+	void tracked()
+	{
+	    return 1;
+	}
+	EOF
+	git add file.c &&
+	test_tick &&
+	git commit -m "add file with leading comment" &&
+	cat >file.c <<-\EOF &&
+	void tracked()
+	{
+	    return 2;
+	}
+	EOF
+	git commit -a -m "modify tracked and delete leading comment" &&
+	git log -L:tracked:file.c --format= -1 -p >actual &&
+	# The leading comment deletion is resolved by the next
+	# non-removal line (void tracked), which is in range.
+	# Pending removals are attributed to the range they precede.
+	grep "return 2" actual &&
+	grep "leading comment" actual
+'
+
+test_expect_success 'setup for line-range filter edge cases' '
+	git checkout --orphan filter-edge &&
+	git reset --hard &&
+	cat >file.c <<-\EOF &&
+	void before()
+	{
+	    return 0;
+	}
+
+	void tracked()
+	{
+	    int a = 1;
+	    int b = 2;
+	    int c = 3;
+	    return a + b + c;
+	}
+
+	void after()
+	{
+	    return 9;
+	}
+	EOF
+	git add file.c &&
+	test_tick &&
+	git commit -m "initial"
+'
+
+test_expect_success '-L change at exact first line of range' '
+	git checkout filter-edge &&
+	# Change the function signature (first line of range)
+	sed "s/void tracked/int tracked/" file.c >tmp &&
+	mv tmp file.c &&
+	git commit -a -m "change first line" &&
+	git log -L:tracked:file.c -p --format=%s -1 >actual &&
+	grep "change first line" actual &&
+	grep "+int tracked" actual &&
+	grep "\\-void tracked" actual
+'
+
+test_expect_success '-L change at exact last line of range' '
+	git checkout filter-edge &&
+	git reset --hard HEAD~1 &&
+	# Change the closing brace line (last line of range)
+	sed "s/^}$/} \/\/ end tracked/" file.c >tmp &&
+	mv tmp file.c &&
+	git commit -a -m "change last line" &&
+	git log -L:tracked:file.c -p --format=%s -1 >actual &&
+	grep "change last line" actual &&
+	grep "end tracked" actual
+'
+
+test_expect_success '-L pure deletion in range (no additions)' '
+	git checkout filter-edge &&
+	git reset --hard HEAD~1 &&
+	# Delete a line inside tracked() without adding anything
+	sed "/int c/d" file.c >tmp &&
+	mv tmp file.c &&
+	git commit -a -m "pure deletion" &&
+	git log -L:tracked:file.c -p --format=%s -1 >actual &&
+	grep "pure deletion" actual &&
+	grep "\\-.*int c" actual
+'
+
 test_done

base-commit: 9f223ef1c026d91c7ac68cc0211bde255dda6199
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2026-04-26 19:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-26 19:10 [PATCH] diff: simplify line-range filter by classifying removals immediately Michael Montalbo via GitGitGadget

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox