git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <junio@pobox.com>,
	"Johannes Schindelin" <johannes.schindelin@gmx.de>,
	"Santi Béjar" <santi@agolina.net>,
	"Boyd Stephen Smith Jr." <bss@iguanasuicide.net>,
	"Teemu Likonen" <tlikonen@iki.fi>
Subject: [PATCH v4 4/7] color-words: take an optional regular expression describing words
Date: Sat, 17 Jan 2009 17:29:45 +0100	[thread overview]
Message-ID: <1232209788-10408-5-git-send-email-trast@student.ethz.ch> (raw)
In-Reply-To: <1232209788-10408-4-git-send-email-trast@student.ethz.ch>

From: Johannes Schindelin <johannes.schindelin@gmx.de>

In some applications, words are not delimited by white space.  To
allow for that, you can specify a regular expression describing
what makes a word with

	git diff --color-words='[A-Za-z0-9]+'

Note that words cannot contain newline characters.

As suggested by Thomas Rast, the words are the exact matches of the
regular expression.

Note that a regular expression beginning with a '^' will match only
a word at the beginning of the hunk, not a word at the beginning of
a line, and is probably not what you want.

This commit contains a quoting fix by Thomas Rast.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 Documentation/diff-options.txt |    6 +++-
 diff.c                         |   64 ++++++++++++++++++++++++++++++++++-----
 diff.h                         |    1 +
 t/t4034-diff-words.sh          |   57 +++++++++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 43793d7..2c1fa4b 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -91,8 +91,12 @@ endif::git-format-patch[]
 	Turn off colored diff, even when the configuration file
 	gives the default to color output.
 
---color-words::
+--color-words[=regex]::
 	Show colored word diff, i.e. color words which have changed.
++
+Optionally, you can pass a regular expression that tells Git what the
+words are that you are looking for; The default is to interpret any
+stretch of non-whitespace as a word.
 
 --no-renames::
 	Turn off rename detection, even when the configuration
diff --git a/diff.c b/diff.c
index 37c886a..9fb3d0d 100644
--- a/diff.c
+++ b/diff.c
@@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len,
 	len--;
 	memcpy(buffer->text.ptr + buffer->text.size, line, len);
 	buffer->text.size += len;
+	buffer->text.ptr[buffer->text.size] = '\0';
 }
 
 struct diff_words_data {
 	struct diff_words_buffer minus, plus;
 	const char *current_plus;
 	FILE *file;
+	regex_t *word_regex;
 };
 
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -382,17 +384,49 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 	diff_words->current_plus = plus_end;
 }
 
+/* This function starts looking at *begin, and returns 0 iff a word was found. */
+static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
+		int *begin, int *end)
+{
+	if (word_regex && *begin < buffer->size) {
+		regmatch_t match[1];
+		if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
+			char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
+					'\n', match[0].rm_eo - match[0].rm_so);
+			*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
+			*begin += match[0].rm_so;
+			return *begin >= *end;
+		}
+		return -1;
+	}
+
+	/* find the next word */
+	while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
+		(*begin)++;
+	if (*begin >= buffer->size)
+		return -1;
+
+	/* find the end of the word */
+	*end = *begin + 1;
+	while (*end < buffer->size && !isspace(buffer->ptr[*end]))
+		(*end)++;
+
+	return 0;
+}
+
 /*
  * This function splits the words in buffer->text, stores the list with
  * newline separator into out, and saves the offsets of the original words
  * in buffer->orig.
  */
-static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
+static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
+		regex_t *word_regex)
 {
 	int i, j;
+	long alloc = 0;
 
 	out->size = 0;
-	out->ptr = xmalloc(buffer->text.size);
+	out->ptr = NULL;
 
 	/* fake an empty "0th" word */
 	ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
@@ -400,11 +434,8 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 	buffer->orig_nr = 1;
 
 	for (i = 0; i < buffer->text.size; i++) {
-		if (isspace(buffer->text.ptr[i]))
-			continue;
-		for (j = i + 1; j < buffer->text.size &&
-				!isspace(buffer->text.ptr[j]); j++)
-			; /* find the end of the word */
+		if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
+			return;
 
 		/* store original boundaries */
 		ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
@@ -414,6 +445,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
 		buffer->orig_nr++;
 
 		/* store one word */
+		ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
 		memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
 		out->ptr[out->size + j - i] = '\n';
 		out->size += j - i + 1;
@@ -443,9 +475,10 @@ static void diff_words_show(struct diff_words_data *diff_words)
 
 	memset(&xpp, 0, sizeof(xpp));
 	memset(&xecfg, 0, sizeof(xecfg));
-	diff_words_fill(&diff_words->minus, &minus);
-	diff_words_fill(&diff_words->plus, &plus);
+	diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
+	diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
 	xpp.flags = XDF_NEED_MINIMAL;
+	/* as only the hunk header will be parsed, we need a 0-context */
 	xecfg.ctxlen = 0;
 	xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
 		      &xpp, &xecfg, &ecb);
@@ -484,6 +517,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
 		free (ecbdata->diff_words->minus.orig);
 		free (ecbdata->diff_words->plus.text.ptr);
 		free (ecbdata->diff_words->plus.orig);
+		free(ecbdata->diff_words->word_regex);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
@@ -1506,6 +1540,14 @@ static void builtin_diff(const char *name_a,
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
 			ecbdata.diff_words->file = o->file;
+			if (o->word_regex) {
+				ecbdata.diff_words->word_regex = (regex_t *)
+					xmalloc(sizeof(regex_t));
+				if (regcomp(ecbdata.diff_words->word_regex,
+						o->word_regex, REG_EXTENDED))
+					die ("Invalid regular expression: %s",
+							o->word_regex);
+			}
 		}
 		xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
 			      &xpp, &xecfg, &ecb);
@@ -2517,6 +2559,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_OPT_CLR(options, COLOR_DIFF);
 	else if (!strcmp(arg, "--color-words"))
 		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+	else if (!prefixcmp(arg, "--color-words=")) {
+		options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
+		options->word_regex = arg + 14;
+	}
 	else if (!strcmp(arg, "--exit-code"))
 		DIFF_OPT_SET(options, EXIT_WITH_STATUS);
 	else if (!strcmp(arg, "--quiet"))
diff --git a/diff.h b/diff.h
index 4d5a327..23cd90c 100644
--- a/diff.h
+++ b/diff.h
@@ -98,6 +98,7 @@ struct diff_options {
 
 	int stat_width;
 	int stat_name_width;
+	const char *word_regex;
 
 	/* this is set by diffcore for DIFF_FORMAT_PATCH */
 	int found_changes;
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
index b22195f..4873486 100755
--- a/t/t4034-diff-words.sh
+++ b/t/t4034-diff-words.sh
@@ -63,4 +63,61 @@ test_expect_success 'word diff with runs of whitespace' '
 
 '
 
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+h(4),<GREEN>hh<RESET>[44]
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa<RESET> )
+EOF
+
+test_expect_success 'word diff with a regular expression' '
+
+	word_diff --color-words="[a-z]+"
+
+'
+
+echo 'aaa (aaa)' > pre
+echo 'aaa (aaa) aaa' > post
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index c29453b..be22f37 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1 +1 @@<RESET>
+aaa (aaa) <GREEN>aaa<RESET>
+EOF
+
+test_expect_success 'test parsing words for newline' '
+
+	word_diff --color-words="a+"
+
+'
+
+echo '(:' > pre
+echo '(' > post
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 289cb9d..2d06f37 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1 +1 @@<RESET>
+(<RED>:<RESET>
+EOF
+
+test_expect_success 'test when words are only removed at the end' '
+
+	word_diff --color-words=.
+
+'
+
 test_done
-- 
1.6.1.315.g92577

  reply	other threads:[~2009-01-17 16:31 UTC|newest]

Thread overview: 109+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-01-11 19:58 [PATCH 0/4] refactor the --color-words to make it more hackable Johannes Schindelin
2009-01-11 19:59 ` [PATCH 1/4] Add color_fwrite(), a function coloring each line individually Johannes Schindelin
2009-01-11 22:43   ` Junio C Hamano
2009-01-11 23:49     ` Johannes Schindelin
2009-01-11 23:49       ` [PATCH v2 " Johannes Schindelin
2009-01-12  1:27         ` Jakub Narebski
2009-01-11 19:59 ` [PATCH 2/4] color-words: refactor word splitting and use ALLOC_GROW() Johannes Schindelin
2009-01-11 19:59 ` [PATCH 3/4] color-words: refactor to allow for 0-character word boundaries Johannes Schindelin
2009-01-11 23:08   ` Junio C Hamano
2009-01-11 23:38     ` Johannes Schindelin
2009-01-12  8:47   ` Thomas Rast
2009-01-12  9:36     ` Junio C Hamano
2009-01-11 20:00 ` [PATCH 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-11 21:53 ` [PATCH 0/4] refactor the --color-words to make it more hackable Thomas Rast
2009-01-11 23:02   ` Johannes Schindelin
2009-01-12  6:25     ` Thomas Rast
2009-01-14 13:00 ` Santi Béjar
2009-01-14 17:49   ` [PATCH take 3 0/4] color-words improvements Johannes Schindelin
2009-01-14 17:50     ` [PATCH 1/4] Add color_fwrite_lines(), a function coloring each line individually Johannes Schindelin
2009-01-14 17:50     ` [PATCH 2/4] color-words: refactor word splitting and use ALLOC_GROW() Johannes Schindelin
2009-01-14 17:51     ` [PATCH 3/4] color-words: change algorithm to allow for 0-character word boundaries Johannes Schindelin
2009-01-14 18:08       ` Johannes Schindelin
2009-01-14 17:51     ` [PATCH 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-14 19:55       ` Thomas Rast
2009-01-14 18:54     ` [PATCH take 3 0/4] color-words improvements Teemu Likonen
2009-01-14 18:57       ` Teemu Likonen
2009-01-14 19:28         ` Johannes Schindelin
2009-01-14 19:32           ` Johannes Schindelin
2009-01-14 20:44             ` [PATCH replacement for take 3 3/4] color-words: change algorithm to allow for 0-character word boundaries Johannes Schindelin
2009-01-14 20:46               ` [PATCH replacement for take 3 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-15  0:32                 ` Thomas Rast
2009-01-15  1:12                   ` Johannes Schindelin
2009-01-15  1:36                     ` Johannes Schindelin
2009-01-15  8:30                       ` Thomas Rast
2009-01-15 10:40                         ` Thomas Rast
2009-01-15 12:54                           ` Johannes Schindelin
2009-01-14 19:58       ` [PATCH take 3 0/4] color-words improvements Thomas Rast
2009-01-14 22:06         ` Johannes Schindelin
2009-01-14 22:11           ` Thomas Rast
2009-01-14 22:24           ` Boyd Stephen Smith Jr.
2009-01-15  4:56           ` Teemu Likonen
2009-01-15 12:41             ` Johannes Schindelin
2009-01-15 13:03               ` Teemu Likonen
2009-01-15 13:27                 ` Thomas Rast
2009-01-15 18:15               ` Junio C Hamano
2009-01-15 19:25                 ` Johannes Schindelin
2009-01-16  0:10                   ` Santi Béjar
2009-01-16  1:37                     ` Junio C Hamano
2009-01-16  1:42                     ` Boyd Stephen Smith Jr.
2009-01-16  1:55                     ` Johannes Schindelin
2009-01-16  9:02                       ` Santi Béjar
2009-01-16 11:57                         ` Johannes Schindelin
2009-01-16 12:01                         ` Santi Béjar
2009-01-16 12:40                           ` Johannes Schindelin
2009-01-16 19:04                           ` Thomas Rast
2009-01-16 21:09                             ` Johannes Schindelin
2009-01-17 16:29                               ` [PATCH v4 0/7] customizable --color-words Thomas Rast
2009-01-17 16:29                                 ` [PATCH v4 1/7] Add color_fwrite_lines(), a function coloring each line individually Thomas Rast
2009-01-17 16:29                                   ` [PATCH v4 2/7] color-words: refactor word splitting and use ALLOC_GROW() Thomas Rast
2009-01-17 16:29                                     ` [PATCH v4 3/7] color-words: change algorithm to allow for 0-character word boundaries Thomas Rast
2009-01-17 16:29                                       ` Thomas Rast [this message]
2009-01-17 16:29                                         ` [PATCH v4 5/7] color-words: enable REG_NEWLINE to help user Thomas Rast
2009-01-17 16:29                                           ` [PATCH v4 6/7] color-words: expand docs with precise semantics Thomas Rast
2009-01-17 16:29                                             ` [PATCH v4 7/7] color-words: make regex configurable via attributes Thomas Rast
2009-01-18 15:05                                 ` [PATCH v4 0/7] customizable --color-words Santi Béjar
2009-01-18 15:29                                   ` Santi Béjar
2009-01-19 22:47                                 ` Santi Béjar
2009-01-19 23:35                                   ` Johannes Schindelin
2009-01-20  2:17                                     ` [PATCH] Add tests for diff.color-words configuration option Boyd Stephen Smith Jr.
2009-01-20  3:45                                       ` [PATCH] diff: Support diff.color-words config option Boyd Stephen Smith Jr.
2009-01-20  6:59                                         ` Junio C Hamano
2009-01-20 17:42                                           ` Markus Heidelberg
2009-01-20 17:58                                             ` Boyd Stephen Smith Jr.
2009-01-20 21:08                                             ` Johannes Schindelin
2009-01-21 10:27                                               ` Junio C Hamano
2009-01-21 19:37                                               ` Markus Heidelberg
2009-01-20 10:02                                         ` Johannes Schindelin
2009-01-20 16:52                                           ` Boyd Stephen Smith Jr.
2009-01-20 17:14                                             ` Johannes Schindelin
2009-01-20 17:09                                           ` Junio C Hamano
2009-01-20 17:28                                             ` Johannes Schindelin
2009-01-20 20:27                                               ` Junio C Hamano
2009-01-20 21:02                                                 ` Johannes Schindelin
2009-01-21  3:46                                           ` [PATCH] color-words: " Boyd Stephen Smith Jr.
2009-01-21  4:59                                             ` [PATCH] Change the spelling of "wordregex" Boyd Stephen Smith Jr.
2009-01-21  8:26                                               ` Johannes Schindelin
2009-01-21  9:22                                                 ` Thomas Rast
2009-01-21 15:33                                                 ` Boyd Stephen Smith Jr.
2009-01-21  8:25                                             ` [PATCH] color-words: Support diff.color-words config option Johannes Schindelin
2009-01-21 16:09                                               ` Boyd Stephen Smith Jr.
2009-01-21 10:27                                             ` [PATCH] color-words: Support diff.wordregex " Junio C Hamano
2009-01-20 14:38                                         ` [PATCH] diff: Support diff.color-words " Jakub Narebski
2009-01-20  9:58                                       ` [PATCH] Add tests for diff.color-words configuration option Johannes Schindelin
2009-01-20 16:34                                         ` Boyd Stephen Smith Jr.
2009-01-20 16:54                                           ` Johannes Schindelin
2009-01-16 16:11                         ` [PATCH take 3 0/4] color-words improvements Boyd Stephen Smith Jr.
2009-01-14 19:46     ` [PATCH] color-words: make regex configurable via attributes Thomas Rast
2009-01-14 20:12       ` Johannes Schindelin
2009-01-14 20:17         ` Thomas Rast
2009-01-14 22:26         ` [PATCH 1/4] color-words: fix quoting in t4034 Thomas Rast
2009-01-14 22:41           ` Johannes Schindelin
2009-01-14 22:26         ` [PATCH 2/4] color-words: enable REG_NEWLINE to help user Thomas Rast
2009-01-14 22:26         ` [PATCH 3/4] color-words: expand docs with precise semantics Thomas Rast
2009-01-14 22:26         ` [PATCH 4/4] color-words: make regex configurable via attributes Thomas Rast
2009-01-15  1:33           ` Johannes Schindelin
2009-01-15  1:43             ` Johannes Schindelin
2009-01-14 20:04     ` [PATCH take 3 0/4] color-words improvements Thomas Rast
2009-01-14 21:07       ` Johannes Schindelin
2009-01-14 22:37         ` Thomas Rast

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1232209788-10408-5-git-send-email-trast@student.ethz.ch \
    --to=trast@student.ethz.ch \
    --cc=bss@iguanasuicide.net \
    --cc=git@vger.kernel.org \
    --cc=johannes.schindelin@gmx.de \
    --cc=junio@pobox.com \
    --cc=santi@agolina.net \
    --cc=tlikonen@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).