From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [RFC PATCH] make diff --color-words customizable
Date: Fri, 9 Jan 2009 01:05:05 +0100 [thread overview]
Message-ID: <1231459505-14395-1-git-send-email-trast@student.ethz.ch> (raw)
Allows for user-configurable word splits when using --color-words.
This can make the diff more readable if the regex is configured
according to the language of the file.
For now the (POSIX extended) regex must be set via the environment
GIT_DIFF_WORDS_REGEX. Each (non-overlapping) match of the regex is
considered a word. Anything characters not matched are considered
whitespace. For example, for C try
GIT_DIFF_WORDS_REGEX='[0-9]+|[a-zA-Z_][a-zA-Z0-9_]*|(\+|-|&|\|){1,2}|\S'
and for TeX try
GIT_DIFF_WORDS_REGEX='\\[a-zA-Z@]+ *|\{|\}|\\.|[^\{} [:space:]]+'
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---
Word diff becomes much more useful especially with TeX, where it is
common to run together \sequences\of\commands\like\this that the
current --color-words treats as a single word.
Apart from possible bugs, the main issue is: where should I put the
configuration for this?
diff.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 127 insertions(+), 15 deletions(-)
diff --git a/diff.c b/diff.c
index d235482..c1e24de 100644
--- a/diff.c
+++ b/diff.c
@@ -321,6 +321,7 @@ struct diff_words_buffer {
long alloc;
long current; /* output pointer */
int suppressed_newline;
+ enum diff_word_boundaries *boundaries;
};
static void diff_words_append(char *line, unsigned long len,
@@ -336,21 +337,35 @@ static void diff_words_append(char *line, unsigned long len,
buffer->text.size += len;
}
+enum diff_word_boundaries {
+ DIFF_WORD_CONT,
+ DIFF_WORD_START,
+ DIFF_WORD_SPACE
+};
+
+
struct diff_words_data {
struct diff_words_buffer minus, plus;
FILE *file;
+ enum diff_word_boundaries *minus_boundaries, *plus_boundaries;
};
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
+static int print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
int suppress_newline)
{
const char *ptr;
int eol = 0;
if (len == 0)
- return;
+ return len;
ptr = buffer->text.ptr + buffer->current;
+
+ if (buffer->boundaries[buffer->current+len-1] == DIFF_WORD_START) {
+ buffer->boundaries[buffer->current+len-1] = DIFF_WORD_CONT;
+ len--;
+ }
+
buffer->current += len;
if (ptr[len - 1] == '\n') {
@@ -368,6 +383,8 @@ static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, in
else
putc('\n', file);
}
+
+ return len;
}
static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
@@ -391,13 +408,79 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
&diff_words->plus, len, DIFF_FILE_NEW, 0);
break;
case ' ':
- print_word(diff_words->file,
- &diff_words->plus, len, DIFF_PLAIN, 0);
+ len = print_word(diff_words->file,
+ &diff_words->plus, len, DIFF_PLAIN, 0);
diff_words->minus.current += len;
break;
}
}
+static char *worddiff_default = "\\S+";
+static regex_t worddiff_regex;
+static int worddiff_regex_compiled = 0;
+
+static int scan_word_boundaries(struct diff_words_buffer *buf)
+{
+ enum diff_word_boundaries *boundaries = buf->boundaries;
+ char *text = buf->text.ptr;
+ int len = buf->text.size;
+
+ int i = 0;
+ int count = 0;
+ int ret;
+ regmatch_t matches[1];
+ int offset, wordlen;
+ char *strz;
+
+ if (!text)
+ return 0;
+
+ if (!worddiff_regex_compiled) {
+ char *wd_pat = getenv("GIT_DIFF_WORDS_REGEX");
+ if (!wd_pat)
+ wd_pat = worddiff_default;
+ ret = regcomp(&worddiff_regex, wd_pat, REG_EXTENDED);
+ if (ret) {
+ char errbuf[1024];
+ regerror(ret, &worddiff_regex, errbuf, 1024);
+ die("word diff regex failed to compile: '%s': %s",
+ wd_pat, errbuf);
+ }
+ worddiff_regex_compiled = 1;
+ }
+
+ strz = xmalloc(len+1);
+ memcpy(strz, text, len);
+ strz[len] = '\0';
+
+ while (i < len) {
+ ret = regexec(&worddiff_regex, strz+i, 1, matches, 0);
+ if (ret == REG_NOMATCH) {
+ /* the rest is whitespace */
+ while (i < len)
+ boundaries[i++] = DIFF_WORD_SPACE;
+ break;
+ }
+
+ offset = matches[0].rm_so;
+ while (offset-- > 0 && i < len)
+ boundaries[i++] = DIFF_WORD_SPACE;
+
+ wordlen = matches[0].rm_eo - matches[0].rm_so;
+ if (wordlen-- > 0 && i < len) {
+ boundaries[i++] = DIFF_WORD_START;
+ count++;
+ }
+ while (wordlen-- > 0 && i < len)
+ boundaries[i++] = DIFF_WORD_CONT;
+ }
+
+ free(strz);
+
+ return count;
+}
+
+
/* this executes the word diff on the accumulated buffers */
static void diff_words_show(struct diff_words_data *diff_words)
{
@@ -406,23 +489,50 @@ static void diff_words_show(struct diff_words_data *diff_words)
xdemitcb_t ecb;
mmfile_t minus, plus;
int i;
+ char *p;
+ int bcount;
memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
- minus.size = diff_words->minus.text.size;
- minus.ptr = xmalloc(minus.size);
- memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
- for (i = 0; i < minus.size; i++)
- if (isspace(minus.ptr[i]))
- minus.ptr[i] = '\n';
+
+ diff_words->minus.boundaries = xmalloc(diff_words->minus.text.size * sizeof(enum diff_word_boundaries));
+ bcount = scan_word_boundaries(&diff_words->minus);
+ minus.size = diff_words->minus.text.size + bcount;
+ minus.ptr = xmalloc(minus.size + bcount);
+ p = minus.ptr;
+ for (i = 0; i < diff_words->minus.text.size; i++) {
+ switch (diff_words->minus.boundaries[i]) {
+ case DIFF_WORD_START:
+ *p++ = '\n';
+ /* fall through */
+ case DIFF_WORD_CONT:
+ *p++ = diff_words->minus.text.ptr[i];
+ break;
+ case DIFF_WORD_SPACE:
+ *p++ = '\n';
+ break;
+ }
+ }
diff_words->minus.current = 0;
- plus.size = diff_words->plus.text.size;
+ diff_words->plus.boundaries = xmalloc(diff_words->plus.text.size * sizeof(enum diff_word_boundaries));
+ bcount = scan_word_boundaries(&diff_words->plus);
+ plus.size = diff_words->plus.text.size + bcount;
plus.ptr = xmalloc(plus.size);
- memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
- for (i = 0; i < plus.size; i++)
- if (isspace(plus.ptr[i]))
- plus.ptr[i] = '\n';
+ p = plus.ptr;
+ for (i = 0; i < diff_words->plus.text.size; i++) {
+ switch (diff_words->plus.boundaries[i]) {
+ case DIFF_WORD_START:
+ *p++ = '\n';
+ /* fall through */
+ case DIFF_WORD_CONT:
+ *p++ = diff_words->plus.text.ptr[i];
+ break;
+ case DIFF_WORD_SPACE:
+ *p++ = '\n';
+ break;
+ }
+ }
diff_words->plus.current = 0;
xpp.flags = XDF_NEED_MINIMAL;
@@ -432,6 +542,8 @@ static void diff_words_show(struct diff_words_data *diff_words)
free(minus.ptr);
free(plus.ptr);
diff_words->minus.text.size = diff_words->plus.text.size = 0;
+ free(diff_words->minus.boundaries);
+ free(diff_words->plus.boundaries);
if (diff_words->minus.suppressed_newline) {
putc('\n', diff_words->file);
--
tg: (c123b7c..) t/word-diff-regex (depends on: origin/master)
next reply other threads:[~2009-01-09 0:06 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-09 0:05 Thomas Rast [this message]
2009-01-09 0:25 ` [RFC PATCH] make diff --color-words customizable Johannes Schindelin
2009-01-09 0:50 ` Thomas Rast
2009-01-09 11:15 ` Johannes Schindelin
2009-01-09 11:59 ` [ILLUSTRATION PATCH] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-09 12:24 ` Thomas Rast
2009-01-09 13:05 ` Teemu Likonen
2009-01-10 0:57 ` [PATCH v2] make diff --color-words customizable Thomas Rast
2009-01-10 1:50 ` Jakub Narebski
2009-01-10 11:37 ` Johannes Schindelin
2009-01-10 13:36 ` Jakub Narebski
2009-01-10 14:08 ` Johannes Schindelin
2009-01-12 23:59 ` Jakub Narebski
2009-01-13 0:40 ` Johannes Schindelin
2009-01-10 17:53 ` Davide Libenzi
2009-01-13 0:52 ` Jakub Narebski
2009-01-13 18:50 ` Davide Libenzi
2009-01-10 10:49 ` Johannes Schindelin
2009-01-10 11:25 ` Thomas Rast
2009-01-10 11:45 ` Johannes Schindelin
2009-01-11 1:34 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 0/4] customizable --color-words Thomas Rast
2009-01-11 10:27 ` [PATCH v3 1/4] word diff: comments, preparations for regex customization Thomas Rast
2009-01-11 13:41 ` Johannes Schindelin
2009-01-11 19:49 ` Johannes Schindelin
2009-01-11 22:19 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 2/4] word diff: customizable word splits Thomas Rast
2009-01-11 22:20 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 3/4] word diff: make regex configurable via attributes Thomas Rast
2009-01-11 23:20 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 4/4] word diff: test customizable word splits Thomas Rast
2009-01-09 9:53 ` [RFC PATCH] make diff --color-words customizable Jeff King
2009-01-09 11:18 ` Johannes Schindelin
2009-01-09 11:22 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1231459505-14395-1-git-send-email-trast@student.ethz.ch \
--to=trast@student.ethz.ch \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).