From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <junio@pobox.com>,
"Johannes Schindelin" <johannes.schindelin@gmx.de>,
"Santi Béjar" <santi@agolina.net>,
"Boyd Stephen Smith Jr." <bss@iguanasuicide.net>,
"Teemu Likonen" <tlikonen@iki.fi>
Subject: [PATCH v4 3/7] color-words: change algorithm to allow for 0-character word boundaries
Date: Sat, 17 Jan 2009 17:29:44 +0100 [thread overview]
Message-ID: <1232209788-10408-4-git-send-email-trast@student.ethz.ch> (raw)
In-Reply-To: <1232209788-10408-3-git-send-email-trast@student.ethz.ch>
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Up until now, the color-words code assumed that word boundaries are
identical to white space characters.
Therefore, it could get away with a very simple scheme: it copied the
hunks, substituted newlines for each white space character, called
libxdiff with the processed text, and then identified the text to
output by the offsets (which agreed since the original text had the
same length).
This code was ugly, for a number of reasons:
- it was impossible to introduce 0-character word boundaries,
- we had to print everything word by word, and
- the code needed extra special handling of newlines in the removed part.
Fix all of these issues by processing the text such that
- we build word lists, separated by newlines,
- we remember the original offsets for every word, and
- after calling libxdiff on the wordlists, we parse the hunk headers, and
find the corresponding offsets, and then
- we print the removed/added parts in one go.
The pre and post samples in the test were provided by Santi Béjar.
Note that there is some strange special handling of hunk headers where
one line range is 0 due to POSIX: in this case, the start is one too
low. In other words a hunk header '@@ -1,0 +2 @@' actually means that
the line must be added after the _second_ line of the pre text, _not_
the first.
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
diff.c | 161 ++++++++++++++++++++++++++++---------------------
t/t4034-diff-words.sh | 66 ++++++++++++++++++++
2 files changed, 159 insertions(+), 68 deletions(-)
create mode 100755 t/t4034-diff-words.sh
diff --git a/diff.c b/diff.c
index c111eef..37c886a 100644
--- a/diff.c
+++ b/diff.c
@@ -319,8 +319,10 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
struct diff_words_buffer {
mmfile_t text;
long alloc;
- long current; /* output pointer */
- int suppressed_newline;
+ struct diff_words_orig {
+ const char *begin, *end;
+ } *orig;
+ int orig_nr, orig_alloc;
};
static void diff_words_append(char *line, unsigned long len,
@@ -335,80 +337,89 @@ static void diff_words_append(char *line, unsigned long len,
struct diff_words_data {
struct diff_words_buffer minus, plus;
+ const char *current_plus;
FILE *file;
};
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
- int suppress_newline)
+static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
{
- const char *ptr;
- int eol = 0;
+ struct diff_words_data *diff_words = priv;
+ int minus_first, minus_len, plus_first, plus_len;
+ const char *minus_begin, *minus_end, *plus_begin, *plus_end;
- if (len == 0)
+ if (line[0] != '@' || parse_hunk_header(line, len,
+ &minus_first, &minus_len, &plus_first, &plus_len))
return;
- ptr = buffer->text.ptr + buffer->current;
- buffer->current += len;
+ /* POSIX requires that first be decremented by one if len == 0... */
+ if (minus_len) {
+ minus_begin = diff_words->minus.orig[minus_first].begin;
+ minus_end =
+ diff_words->minus.orig[minus_first + minus_len - 1].end;
+ } else
+ minus_begin = minus_end =
+ diff_words->minus.orig[minus_first].end;
- if (ptr[len - 1] == '\n') {
- eol = 1;
- len--;
- }
+ if (plus_len) {
+ plus_begin = diff_words->plus.orig[plus_first].begin;
+ plus_end = diff_words->plus.orig[plus_first + plus_len - 1].end;
+ } else
+ plus_begin = plus_end = diff_words->plus.orig[plus_first].end;
- fputs(diff_get_color(1, color), file);
- fwrite(ptr, len, 1, file);
- fputs(diff_get_color(1, DIFF_RESET), file);
+ if (diff_words->current_plus != plus_begin)
+ fwrite(diff_words->current_plus,
+ plus_begin - diff_words->current_plus, 1,
+ diff_words->file);
+ if (minus_begin != minus_end)
+ color_fwrite_lines(diff_words->file,
+ diff_get_color(1, DIFF_FILE_OLD),
+ minus_end - minus_begin, minus_begin);
+ if (plus_begin != plus_end)
+ color_fwrite_lines(diff_words->file,
+ diff_get_color(1, DIFF_FILE_NEW),
+ plus_end - plus_begin, plus_begin);
- if (eol) {
- if (suppress_newline)
- buffer->suppressed_newline = 1;
- else
- putc('\n', file);
- }
-}
-
-static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
-{
- struct diff_words_data *diff_words = priv;
-
- if (diff_words->minus.suppressed_newline) {
- if (line[0] != '+')
- putc('\n', diff_words->file);
- diff_words->minus.suppressed_newline = 0;
- }
-
- len--;
- switch (line[0]) {
- case '-':
- print_word(diff_words->file,
- &diff_words->minus, len, DIFF_FILE_OLD, 1);
- break;
- case '+':
- print_word(diff_words->file,
- &diff_words->plus, len, DIFF_FILE_NEW, 0);
- break;
- case ' ':
- print_word(diff_words->file,
- &diff_words->plus, len, DIFF_PLAIN, 0);
- diff_words->minus.current += len;
- break;
- }
+ diff_words->current_plus = plus_end;
}
/*
- * This function splits the words in buffer->text, and stores the list with
- * newline separator into out.
+ * This function splits the words in buffer->text, stores the list with
+ * newline separator into out, and saves the offsets of the original words
+ * in buffer->orig.
*/
static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
{
- int i;
- out->size = buffer->text.size;
- out->ptr = xmalloc(out->size);
- memcpy(out->ptr, buffer->text.ptr, out->size);
- for (i = 0; i < out->size; i++)
- if (isspace(out->ptr[i]))
- out->ptr[i] = '\n';
- buffer->current = 0;
+ int i, j;
+
+ out->size = 0;
+ out->ptr = xmalloc(buffer->text.size);
+
+ /* fake an empty "0th" word */
+ ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
+ buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
+ buffer->orig_nr = 1;
+
+ for (i = 0; i < buffer->text.size; i++) {
+ if (isspace(buffer->text.ptr[i]))
+ continue;
+ for (j = i + 1; j < buffer->text.size &&
+ !isspace(buffer->text.ptr[j]); j++)
+ ; /* find the end of the word */
+
+ /* store original boundaries */
+ ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
+ buffer->orig_alloc);
+ buffer->orig[buffer->orig_nr].begin = buffer->text.ptr + i;
+ buffer->orig[buffer->orig_nr].end = buffer->text.ptr + j;
+ buffer->orig_nr++;
+
+ /* store one word */
+ memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
+ out->ptr[out->size + j - i] = '\n';
+ out->size += j - i + 1;
+
+ i = j - 1;
+ }
}
/* this executes the word diff on the accumulated buffers */
@@ -419,22 +430,34 @@ static void diff_words_show(struct diff_words_data *diff_words)
xdemitcb_t ecb;
mmfile_t minus, plus;
+ /* special case: only removal */
+ if (!diff_words->plus.text.size) {
+ color_fwrite_lines(diff_words->file,
+ diff_get_color(1, DIFF_FILE_OLD),
+ diff_words->minus.text.size, diff_words->minus.text.ptr);
+ diff_words->minus.text.size = 0;
+ return;
+ }
+
+ diff_words->current_plus = diff_words->plus.text.ptr;
+
memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
diff_words_fill(&diff_words->minus, &minus);
diff_words_fill(&diff_words->plus, &plus);
xpp.flags = XDF_NEED_MINIMAL;
- xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+ xecfg.ctxlen = 0;
xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
&xpp, &xecfg, &ecb);
free(minus.ptr);
free(plus.ptr);
+ if (diff_words->current_plus != diff_words->plus.text.ptr +
+ diff_words->plus.text.size)
+ fwrite(diff_words->current_plus,
+ diff_words->plus.text.ptr + diff_words->plus.text.size
+ - diff_words->current_plus, 1,
+ diff_words->file);
diff_words->minus.text.size = diff_words->plus.text.size = 0;
-
- if (diff_words->minus.suppressed_newline) {
- putc('\n', diff_words->file);
- diff_words->minus.suppressed_newline = 0;
- }
}
typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
@@ -458,7 +481,9 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
diff_words_show(ecbdata->diff_words);
free (ecbdata->diff_words->minus.text.ptr);
+ free (ecbdata->diff_words->minus.orig);
free (ecbdata->diff_words->plus.text.ptr);
+ free (ecbdata->diff_words->plus.orig);
free(ecbdata->diff_words);
ecbdata->diff_words = NULL;
}
diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh
new file mode 100755
index 0000000..b22195f
--- /dev/null
+++ b/t/t4034-diff-words.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+
+test_description='word diff colors'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+
+ git config diff.color.old red
+ git config diff.color.new green
+
+'
+
+decrypt_color () {
+ sed \
+ -e 's/.\[1m/<WHITE>/g' \
+ -e 's/.\[31m/<RED>/g' \
+ -e 's/.\[32m/<GREEN>/g' \
+ -e 's/.\[36m/<BROWN>/g' \
+ -e 's/.\[m/<RESET>/g'
+}
+
+word_diff () {
+ test_must_fail git diff --no-index "$@" pre post > output &&
+ decrypt_color < output > output.decrypted &&
+ test_cmp expect output.decrypted
+}
+
+cat > pre <<\EOF
+h(4)
+
+a = b + c
+EOF
+
+cat > post <<\EOF
+h(4),hh[44]
+
+a = b + c
+
+aa = a
+
+aeff = aeff * ( aaa )
+EOF
+
+cat > expect <<\EOF
+<WHITE>diff --git a/pre b/post<RESET>
+<WHITE>index 330b04f..5ed8eff 100644<RESET>
+<WHITE>--- a/pre<RESET>
+<WHITE>+++ b/post<RESET>
+<BROWN>@@ -1,3 +1,7 @@<RESET>
+<RED>h(4)<RESET><GREEN>h(4),hh[44]<RESET>
+<RESET>
+a = b + c<RESET>
+
+<GREEN>aa = a<RESET>
+
+<GREEN>aeff = aeff * ( aaa )<RESET>
+EOF
+
+test_expect_success 'word diff with runs of whitespace' '
+
+ word_diff --color-words
+
+'
+
+test_done
--
1.6.1.315.g92577
next prev parent reply other threads:[~2009-01-17 16:31 UTC|newest]
Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-11 19:58 [PATCH 0/4] refactor the --color-words to make it more hackable Johannes Schindelin
2009-01-11 19:59 ` [PATCH 1/4] Add color_fwrite(), a function coloring each line individually Johannes Schindelin
2009-01-11 22:43 ` Junio C Hamano
2009-01-11 23:49 ` Johannes Schindelin
2009-01-11 23:49 ` [PATCH v2 " Johannes Schindelin
2009-01-12 1:27 ` Jakub Narebski
2009-01-11 19:59 ` [PATCH 2/4] color-words: refactor word splitting and use ALLOC_GROW() Johannes Schindelin
2009-01-11 19:59 ` [PATCH 3/4] color-words: refactor to allow for 0-character word boundaries Johannes Schindelin
2009-01-11 23:08 ` Junio C Hamano
2009-01-11 23:38 ` Johannes Schindelin
2009-01-12 8:47 ` Thomas Rast
2009-01-12 9:36 ` Junio C Hamano
2009-01-11 20:00 ` [PATCH 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-11 21:53 ` [PATCH 0/4] refactor the --color-words to make it more hackable Thomas Rast
2009-01-11 23:02 ` Johannes Schindelin
2009-01-12 6:25 ` Thomas Rast
2009-01-14 13:00 ` Santi Béjar
2009-01-14 17:49 ` [PATCH take 3 0/4] color-words improvements Johannes Schindelin
2009-01-14 17:50 ` [PATCH 1/4] Add color_fwrite_lines(), a function coloring each line individually Johannes Schindelin
2009-01-14 17:50 ` [PATCH 2/4] color-words: refactor word splitting and use ALLOC_GROW() Johannes Schindelin
2009-01-14 17:51 ` [PATCH 3/4] color-words: change algorithm to allow for 0-character word boundaries Johannes Schindelin
2009-01-14 18:08 ` Johannes Schindelin
2009-01-14 17:51 ` [PATCH 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-14 19:55 ` Thomas Rast
2009-01-14 18:54 ` [PATCH take 3 0/4] color-words improvements Teemu Likonen
2009-01-14 18:57 ` Teemu Likonen
2009-01-14 19:28 ` Johannes Schindelin
2009-01-14 19:32 ` Johannes Schindelin
2009-01-14 20:44 ` [PATCH replacement for take 3 3/4] color-words: change algorithm to allow for 0-character word boundaries Johannes Schindelin
2009-01-14 20:46 ` [PATCH replacement for take 3 4/4] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-15 0:32 ` Thomas Rast
2009-01-15 1:12 ` Johannes Schindelin
2009-01-15 1:36 ` Johannes Schindelin
2009-01-15 8:30 ` Thomas Rast
2009-01-15 10:40 ` Thomas Rast
2009-01-15 12:54 ` Johannes Schindelin
2009-01-14 19:58 ` [PATCH take 3 0/4] color-words improvements Thomas Rast
2009-01-14 22:06 ` Johannes Schindelin
2009-01-14 22:11 ` Thomas Rast
2009-01-14 22:24 ` Boyd Stephen Smith Jr.
2009-01-15 4:56 ` Teemu Likonen
2009-01-15 12:41 ` Johannes Schindelin
2009-01-15 13:03 ` Teemu Likonen
2009-01-15 13:27 ` Thomas Rast
2009-01-15 18:15 ` Junio C Hamano
2009-01-15 19:25 ` Johannes Schindelin
2009-01-16 0:10 ` Santi Béjar
2009-01-16 1:37 ` Junio C Hamano
2009-01-16 1:42 ` Boyd Stephen Smith Jr.
2009-01-16 1:55 ` Johannes Schindelin
2009-01-16 9:02 ` Santi Béjar
2009-01-16 11:57 ` Johannes Schindelin
2009-01-16 12:01 ` Santi Béjar
2009-01-16 12:40 ` Johannes Schindelin
2009-01-16 19:04 ` Thomas Rast
2009-01-16 21:09 ` Johannes Schindelin
2009-01-17 16:29 ` [PATCH v4 0/7] customizable --color-words Thomas Rast
2009-01-17 16:29 ` [PATCH v4 1/7] Add color_fwrite_lines(), a function coloring each line individually Thomas Rast
2009-01-17 16:29 ` [PATCH v4 2/7] color-words: refactor word splitting and use ALLOC_GROW() Thomas Rast
2009-01-17 16:29 ` Thomas Rast [this message]
2009-01-17 16:29 ` [PATCH v4 4/7] color-words: take an optional regular expression describing words Thomas Rast
2009-01-17 16:29 ` [PATCH v4 5/7] color-words: enable REG_NEWLINE to help user Thomas Rast
2009-01-17 16:29 ` [PATCH v4 6/7] color-words: expand docs with precise semantics Thomas Rast
2009-01-17 16:29 ` [PATCH v4 7/7] color-words: make regex configurable via attributes Thomas Rast
2009-01-18 15:05 ` [PATCH v4 0/7] customizable --color-words Santi Béjar
2009-01-18 15:29 ` Santi Béjar
2009-01-19 22:47 ` Santi Béjar
2009-01-19 23:35 ` Johannes Schindelin
2009-01-20 2:17 ` [PATCH] Add tests for diff.color-words configuration option Boyd Stephen Smith Jr.
2009-01-20 3:45 ` [PATCH] diff: Support diff.color-words config option Boyd Stephen Smith Jr.
2009-01-20 6:59 ` Junio C Hamano
2009-01-20 17:42 ` Markus Heidelberg
2009-01-20 17:58 ` Boyd Stephen Smith Jr.
2009-01-20 21:08 ` Johannes Schindelin
2009-01-21 10:27 ` Junio C Hamano
2009-01-21 19:37 ` Markus Heidelberg
2009-01-20 10:02 ` Johannes Schindelin
2009-01-20 16:52 ` Boyd Stephen Smith Jr.
2009-01-20 17:14 ` Johannes Schindelin
2009-01-20 17:09 ` Junio C Hamano
2009-01-20 17:28 ` Johannes Schindelin
2009-01-20 20:27 ` Junio C Hamano
2009-01-20 21:02 ` Johannes Schindelin
2009-01-21 3:46 ` [PATCH] color-words: " Boyd Stephen Smith Jr.
2009-01-21 4:59 ` [PATCH] Change the spelling of "wordregex" Boyd Stephen Smith Jr.
2009-01-21 8:26 ` Johannes Schindelin
2009-01-21 9:22 ` Thomas Rast
2009-01-21 15:33 ` Boyd Stephen Smith Jr.
2009-01-21 8:25 ` [PATCH] color-words: Support diff.color-words config option Johannes Schindelin
2009-01-21 16:09 ` Boyd Stephen Smith Jr.
2009-01-21 10:27 ` [PATCH] color-words: Support diff.wordregex " Junio C Hamano
2009-01-20 14:38 ` [PATCH] diff: Support diff.color-words " Jakub Narebski
2009-01-20 9:58 ` [PATCH] Add tests for diff.color-words configuration option Johannes Schindelin
2009-01-20 16:34 ` Boyd Stephen Smith Jr.
2009-01-20 16:54 ` Johannes Schindelin
2009-01-16 16:11 ` [PATCH take 3 0/4] color-words improvements Boyd Stephen Smith Jr.
2009-01-14 19:46 ` [PATCH] color-words: make regex configurable via attributes Thomas Rast
2009-01-14 20:12 ` Johannes Schindelin
2009-01-14 20:17 ` Thomas Rast
2009-01-14 22:26 ` [PATCH 1/4] color-words: fix quoting in t4034 Thomas Rast
2009-01-14 22:41 ` Johannes Schindelin
2009-01-14 22:26 ` [PATCH 2/4] color-words: enable REG_NEWLINE to help user Thomas Rast
2009-01-14 22:26 ` [PATCH 3/4] color-words: expand docs with precise semantics Thomas Rast
2009-01-14 22:26 ` [PATCH 4/4] color-words: make regex configurable via attributes Thomas Rast
2009-01-15 1:33 ` Johannes Schindelin
2009-01-15 1:43 ` Johannes Schindelin
2009-01-14 20:04 ` [PATCH take 3 0/4] color-words improvements Thomas Rast
2009-01-14 21:07 ` Johannes Schindelin
2009-01-14 22:37 ` Thomas Rast
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1232209788-10408-4-git-send-email-trast@student.ethz.ch \
--to=trast@student.ethz.ch \
--cc=bss@iguanasuicide.net \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
--cc=junio@pobox.com \
--cc=santi@agolina.net \
--cc=tlikonen@iki.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).