git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: Junio C Hamano <junio@pobox.com>,
	Johannes Schindelin <johannes.schindelin@gmx.de>,
	Teemu Likonen <tlikonen@iki.fi>
Subject: [PATCH v3 1/4] word diff: comments, preparations for regex customization
Date: Sun, 11 Jan 2009 11:27:11 +0100	[thread overview]
Message-ID: <4aea85caafd38a058145c5769fe8a30ffdbd4d13.1231669012.git.trast@student.ethz.ch> (raw)
In-Reply-To: <cover.1231669012.git.trast@student.ethz.ch>
In-Reply-To: <7vr63atykr.fsf@gitster.siamese.dyndns.org>

This reorganizes the code for diff --color-words in a way that will be
convenient for the next patch, without changing any of the semantics.
The new variables are not used yet except for their default state.

We also add some comments on the workings of diff_words_show() and
associated helper routines.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---
 diff.c |   86 +++++++++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 69 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index d235482..f274bf5 100644
--- a/diff.c
+++ b/diff.c
@@ -316,11 +316,21 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	return 0;
 }
 
+/* unused right now */
+enum diff_word_boundaries {
+	DIFF_WORD_UNDEF,
+	DIFF_WORD_BODY,
+	DIFF_WORD_END,
+	DIFF_WORD_SPACE,
+	DIFF_WORD_SKIP
+};
+
 struct diff_words_buffer {
 	mmfile_t text;
 	long alloc;
 	long current; /* output pointer */
 	int suppressed_newline;
+	enum diff_word_boundaries *boundaries;
 };
 
 static void diff_words_append(char *line, unsigned long len,
@@ -339,16 +349,23 @@ static void diff_words_append(char *line, unsigned long len,
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
 	FILE *file;
+	regex_t *word_regex; /* currently unused */
 };
 
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
+/*
+ * Print 'len' characters from the "real" diff data in 'buffer'.  Also
+ * returns how many characters were printed (currently always 'len').
+ * With 'suppress_newline', we remember a final newline instead of
+ * printing it.
+ */
+static int print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
 		int suppress_newline)
 {
 	const char *ptr;
 	int eol = 0;
 
 	if (len == 0)
-		return;
+		return len;
 
 	ptr  = buffer->text.ptr + buffer->current;
 	buffer->current += len;
@@ -368,18 +385,30 @@ static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, in
 		else
 			putc('\n', file);
 	}
+
+	/* we need to return how many chars to skip on the other side,
+	 * so account for the (held off) \n */
+	return len+eol;
 }
 
+/*
+ * Callback for word diff output
+ */
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 {
 	struct diff_words_data *diff_words = priv;
 
 	if (diff_words->minus.suppressed_newline) {
+		/* We completely drop a suppressed newline on the
+		 * minus side, if it is immediately followed by a plus
+		 * side output. This formats a word change right
+		 * before the end of line correctly */
 		if (line[0] != '+')
 			putc('\n', diff_words->file);
 		diff_words->minus.suppressed_newline = 0;
 	}
 
+	/* account for the [+- ] inserted by the line diff */
 	len--;
 	switch (line[0]) {
 		case '-':
@@ -391,8 +420,10 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 				   &diff_words->plus, len, DIFF_FILE_NEW, 0);
 			break;
 		case ' ':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_PLAIN, 0);
+			len = print_word(diff_words->file,
+					 &diff_words->plus, len, DIFF_PLAIN, 0);
+			/* skip the characters that were printed on
+			 * the other side, too */
 			diff_words->minus.current += len;
 			break;
 	}
@@ -409,22 +440,37 @@ static void diff_words_show(struct diff_words_data *diff_words)
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	minus.size = diff_words->minus.text.size;
-	minus.ptr = xmalloc(minus.size);
-	memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-	for (i = 0; i < minus.size; i++)
-		if (isspace(minus.ptr[i]))
-			minus.ptr[i] = '\n';
-	diff_words->minus.current = 0;
 
-	plus.size = diff_words->plus.text.size;
-	plus.ptr = xmalloc(plus.size);
-	memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-	for (i = 0; i < plus.size; i++)
-		if (isspace(plus.ptr[i]))
-			plus.ptr[i] = '\n';
+	/* currently always true */
+	if (!diff_words->word_regex) {
+		/*
+		 * "Simple" word diff: replace all space characters
+		 * with a newline.
+		 *
+		 * This groups together "words" of nonspaces on a line
+		 * each, which we then diff using the normal line-diff
+		 * mechanism.  It also has the nice property that
+		 * character counts/offsets stay the same.
+		 */
+		minus.size = diff_words->minus.text.size;
+		minus.ptr = xmalloc(minus.size);
+		memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
+		for (i = 0; i < minus.size; i++)
+			if (isspace(minus.ptr[i]))
+				minus.ptr[i] = '\n';
+
+		plus.size = diff_words->plus.text.size;
+		plus.ptr = xmalloc(plus.size);
+		memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
+		for (i = 0; i < plus.size; i++)
+			if (isspace(plus.ptr[i]))
+				plus.ptr[i] = '\n';
+	}
+	diff_words->minus.current = 0;
 	diff_words->plus.current = 0;
 
+	/* we want a minimal diff with enough context to run
+	 * everything into a single hunk */
 	xpp.flags = XDF_NEED_MINIMAL;
 	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
@@ -432,7 +478,12 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	free(minus.ptr);
 	free(plus.ptr);
 	diff_words->minus.text.size = diff_words->plus.text.size = 0;
+	/* these two are currently free(NULL) */
+	free(diff_words->minus.boundaries);
+	free(diff_words->plus.boundaries);
 
+	/* do not forget about a possible final newline that was held
+	 * back */
 	if (diff_words->minus.suppressed_newline) {
 		putc('\n', diff_words->file);
 		diff_words->minus.suppressed_newline = 0;
@@ -461,6 +512,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 
 		free (ecbdata->diff_words->minus.text.ptr);
 		free (ecbdata->diff_words->plus.text.ptr);
+		free(ecbdata->diff_words->word_regex);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
-- 
1.6.1.269.g0769

  parent reply	other threads:[~2009-01-11 10:28 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-09  0:05 [RFC PATCH] make diff --color-words customizable Thomas Rast
2009-01-09  0:25 ` Johannes Schindelin
2009-01-09  0:50   ` Thomas Rast
2009-01-09 11:15     ` Johannes Schindelin
2009-01-09 11:59       ` [ILLUSTRATION PATCH] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-09 12:24         ` Thomas Rast
2009-01-09 13:05           ` Teemu Likonen
2009-01-10  0:57             ` [PATCH v2] make diff --color-words customizable Thomas Rast
2009-01-10  1:50               ` Jakub Narebski
2009-01-10 11:37                 ` Johannes Schindelin
2009-01-10 13:36                   ` Jakub Narebski
2009-01-10 14:08                     ` Johannes Schindelin
2009-01-12 23:59                       ` Jakub Narebski
2009-01-13  0:40                         ` Johannes Schindelin
2009-01-10 17:53                     ` Davide Libenzi
2009-01-13  0:52                       ` Jakub Narebski
2009-01-13 18:50                         ` Davide Libenzi
2009-01-10 10:49               ` Johannes Schindelin
2009-01-10 11:25                 ` Thomas Rast
2009-01-10 11:45                   ` Johannes Schindelin
2009-01-11  1:34                     ` Junio C Hamano
2009-01-11 10:27                       ` [PATCH v3 0/4] customizable --color-words Thomas Rast
2009-01-11 10:27                       ` Thomas Rast [this message]
2009-01-11 13:41                         ` [PATCH v3 1/4] word diff: comments, preparations for regex customization Johannes Schindelin
2009-01-11 19:49                         ` Johannes Schindelin
2009-01-11 22:19                         ` Junio C Hamano
2009-01-11 10:27                       ` [PATCH v3 2/4] word diff: customizable word splits Thomas Rast
2009-01-11 22:20                         ` Junio C Hamano
2009-01-11 10:27                       ` [PATCH v3 3/4] word diff: make regex configurable via attributes Thomas Rast
2009-01-11 23:20                         ` Junio C Hamano
2009-01-11 10:27                       ` [PATCH v3 4/4] word diff: test customizable word splits Thomas Rast
2009-01-09  9:53 ` [RFC PATCH] make diff --color-words customizable Jeff King
2009-01-09 11:18   ` Johannes Schindelin
2009-01-09 11:22     ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4aea85caafd38a058145c5769fe8a30ffdbd4d13.1231669012.git.trast@student.ethz.ch \
    --to=trast@student.ethz.ch \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=junio@pobox.com \
    --cc=tlikonen@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).