From: Junio C Hamano <gitster@pobox.com>
To: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Ping Yin <pkufranky@gmail.com>, git@vger.kernel.org
Subject: Re: [PATCH v2 4/5] Make boundary characters for --color-words configurable
Date: Wed, 07 May 2008 12:13:39 -0700 [thread overview]
Message-ID: <7viqxqc4gs.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <alpine.DEB.1.00.0805071223450.30431@racer> (Johannes Schindelin's message of "Wed, 7 May 2008 12:24:19 +0100 (BST)")
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> I am rather interested in the semantics, i.e. if you can punch holes into
> this 3-class approach.
This is not the 3-class thing, but was done as a lunchtime hack. It
removes more lines than it adds, with real comments ;-).
* It removes the custom allocator used for minus/plus buffers and
replaces it with the bog-standard strbuf;
* The tokenization is done when diff_words_append() is called, i.e. when
we read the original "added or deleted _lines_";
* The tokenization function is separated out, and gets the emit_callback,
so anybody can enhance it with customization using gitattributes and
other heuristics. More importantly, it is not byte oriented and would
be easier to extend it to UTF-8 contents;
* It does not have to play "suppressed_newline" games anymore. A LF is
just a token.
I haven't tested this at all (this is a lunchtime hack) and have a mild
suspicion that it may have corner case miscounting (e.g. I blindly
subtracts 3 from len when dealing with a line that represents a single
token from the internal diff output --- do I always have 3 there even when
the original file ends with an incomplete line? I didn't check), but
other than that I think this is a lot easier to read and follow.
---
diff.c | 216 +++++++++++++++++++++++++++++++--------------------------------
1 files changed, 106 insertions(+), 110 deletions(-)
diff --git a/diff.c b/diff.c
index e35384b..344aaa6 100644
--- a/diff.c
+++ b/diff.c
@@ -351,87 +351,119 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
return 0;
}
-struct diff_words_buffer {
- mmfile_t text;
- long alloc;
- long current; /* output pointer */
- int suppressed_newline;
+typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
+
+struct emit_callback {
+ struct xdiff_emit_state xm;
+ int nparents, color_diff;
+ unsigned ws_rule;
+ sane_truncate_fn truncate;
+ const char **label_path;
+ struct diff_words_data *diff_words;
+ int *found_changesp;
+ FILE *file;
};
-static void diff_words_append(char *line, unsigned long len,
- struct diff_words_buffer *buffer)
+static size_t diff_words_tokenize(struct emit_callback *ecbdata,
+ char *line, unsigned long len)
{
- if (buffer->text.size + len > buffer->alloc) {
- buffer->alloc = (buffer->text.size + len) * 3 / 2;
- buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
+ /*
+ * This function currently is deliberately done very stupid,
+ * but passing ecbdata here means that you can potentially
+ * implement different tokenization rules depending on
+ * the content (e.g. "gitattributes(5)").
+ */
+ int is_space;
+ char *line0 = line;
+
+ if (!len)
+ return 0;
+
+ is_space = isspace(*line);
+ while (len && (isspace(*line) == is_space)) {
+ line++;
+ len--;
}
+ return line - line0;
+}
+
+static void diff_words_append(struct emit_callback *ecbdata,
+ char *line, unsigned long len,
+ struct strbuf *text)
+{
+ /* Skip leading +/- first. */
line++;
len--;
- memcpy(buffer->text.ptr + buffer->text.size, line, len);
- buffer->text.size += len;
+
+ /*
+ * Tokenize and stuff the words in.
+ */
+ while (len) {
+ size_t token_len = diff_words_tokenize(ecbdata, line, len);
+
+ if (line[0] != '\n') {
+ /*
+ * A nonempty token has ' ' stuffed in front,
+ * so that we can recover the original
+ * end-of-line easily. Stupid, but works.
+ */
+ strbuf_add(text, " ", 1);
+ strbuf_add(text, line, token_len);
+ strbuf_add(text, "\n", 1);
+ len -= token_len;
+ line += token_len;
+ } else {
+ /* A real LF */
+ strbuf_add(text, "\n", 1);
+ break;
+ }
+ }
}
struct diff_words_data {
struct xdiff_emit_state xm;
- struct diff_words_buffer minus, plus;
+ struct strbuf minus;
+ struct strbuf plus;
FILE *file;
};
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
- int suppress_newline)
+static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len)
{
- const char *ptr;
- int eol = 0;
-
- if (len == 0)
- return;
-
- ptr = buffer->text.ptr + buffer->current;
- buffer->current += len;
-
- if (ptr[len - 1] == '\n') {
- eol = 1;
- len--;
- }
-
- fputs(diff_get_color(1, color), file);
- fwrite(ptr, len, 1, file);
- fputs(diff_get_color(1, DIFF_RESET), file);
-
- if (eol) {
- if (suppress_newline)
- buffer->suppressed_newline = 1;
- else
- putc('\n', file);
- }
+ fputs(set, file);
+ fwrite(line, len, 1, file);
+ fputs(reset, file);
}
static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
{
struct diff_words_data *diff_words = priv;
+ const char *set;
+ const char *reset = diff_colors[DIFF_RESET];
- if (diff_words->minus.suppressed_newline) {
- if (line[0] != '+')
- putc('\n', diff_words->file);
- diff_words->minus.suppressed_newline = 0;
+ switch (line[0]) {
+ case '-':
+ set = diff_colors[DIFF_FILE_OLD];
+ break;
+ case '+':
+ set = diff_colors[DIFF_FILE_NEW];
+ break;
+ case ' ':
+ set = diff_colors[DIFF_PLAIN];
+ break;
+ default:
+ return; /* omit @@ -j,k +l,m @@ header */
}
- len--;
- switch (line[0]) {
- case '-':
- print_word(diff_words->file,
- &diff_words->minus, len, DIFF_FILE_OLD, 1);
- break;
- case '+':
- print_word(diff_words->file,
- &diff_words->plus, len, DIFF_FILE_NEW, 0);
- break;
- case ' ':
- print_word(diff_words->file,
- &diff_words->plus, len, DIFF_PLAIN, 0);
- diff_words->minus.current += len;
- break;
+ if (line[1] == ' ') {
+ /* A token */
+ line += 2;
+ len -= 3; /* drop the trailing LF */
+ } else {
+ /* A real LF */
+ line++;
+ len--;
}
+ emit_line(diff_words->file, set, reset, line, len);
}
/* this executes the word diff on the accumulated buffers */
@@ -441,27 +473,18 @@ static void diff_words_show(struct diff_words_data *diff_words)
xdemitconf_t xecfg;
xdemitcb_t ecb;
mmfile_t minus, plus;
- int i;
+ unsigned long sz;
memset(&xecfg, 0, sizeof(xecfg));
- minus.size = diff_words->minus.text.size;
- minus.ptr = xmalloc(minus.size);
- memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
- for (i = 0; i < minus.size; i++)
- if (isspace(minus.ptr[i]))
- minus.ptr[i] = '\n';
- diff_words->minus.current = 0;
-
- plus.size = diff_words->plus.text.size;
- plus.ptr = xmalloc(plus.size);
- memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
- for (i = 0; i < plus.size; i++)
- if (isspace(plus.ptr[i]))
- plus.ptr[i] = '\n';
- diff_words->plus.current = 0;
+
+ minus.ptr = strbuf_detach(&diff_words->minus, &sz);
+ minus.size = sz;
+ plus.ptr = strbuf_detach(&diff_words->plus, &sz);
+ plus.size = sz;
xpp.flags = XDF_NEED_MINIMAL;
- xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+ /* hack to make it a single hunk to show all */
+ xecfg.ctxlen = minus.size + plus.size;
ecb.outf = xdiff_outf;
ecb.priv = diff_words;
diff_words->xm.consume = fn_out_diff_words_aux;
@@ -469,37 +492,15 @@ static void diff_words_show(struct diff_words_data *diff_words)
free(minus.ptr);
free(plus.ptr);
- diff_words->minus.text.size = diff_words->plus.text.size = 0;
-
- if (diff_words->minus.suppressed_newline) {
- putc('\n', diff_words->file);
- diff_words->minus.suppressed_newline = 0;
- }
}
-typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
-
-struct emit_callback {
- struct xdiff_emit_state xm;
- int nparents, color_diff;
- unsigned ws_rule;
- sane_truncate_fn truncate;
- const char **label_path;
- struct diff_words_data *diff_words;
- int *found_changesp;
- FILE *file;
-};
-
static void free_diff_words_data(struct emit_callback *ecbdata)
{
if (ecbdata->diff_words) {
/* flush buffers */
- if (ecbdata->diff_words->minus.text.size ||
- ecbdata->diff_words->plus.text.size)
+ if (ecbdata->diff_words->minus.len ||
+ ecbdata->diff_words->plus.len)
diff_words_show(ecbdata->diff_words);
-
- free (ecbdata->diff_words->minus.text.ptr);
- free (ecbdata->diff_words->plus.text.ptr);
free(ecbdata->diff_words);
ecbdata->diff_words = NULL;
}
@@ -512,13 +513,6 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix)
return "";
}
-static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len)
-{
- fputs(set, file);
- fwrite(line, len, 1, file);
- fputs(reset, file);
-}
-
static void emit_add_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len)
{
const char *ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE);
@@ -604,16 +598,16 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
free_diff_words_data(ecbdata);
if (ecbdata->diff_words) {
if (line[0] == '-') {
- diff_words_append(line, len,
+ diff_words_append(ecbdata, line, len,
&ecbdata->diff_words->minus);
return;
} else if (line[0] == '+') {
- diff_words_append(line, len,
+ diff_words_append(ecbdata, line, len,
&ecbdata->diff_words->plus);
return;
}
- if (ecbdata->diff_words->minus.text.size ||
- ecbdata->diff_words->plus.text.size)
+ if (ecbdata->diff_words->minus.len ||
+ ecbdata->diff_words->plus.len)
diff_words_show(ecbdata->diff_words);
line++;
len--;
@@ -1470,6 +1464,8 @@ static void builtin_diff(const char *name_a,
if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) {
ecbdata.diff_words =
xcalloc(1, sizeof(struct diff_words_data));
+ strbuf_init(&ecbdata.diff_words->minus, 0);
+ strbuf_init(&ecbdata.diff_words->plus, 0);
ecbdata.diff_words->file = o->file;
}
xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);
next prev parent reply other threads:[~2008-05-07 19:14 UTC|newest]
Thread overview: 92+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-05-02 3:39 [PATCH] Make words boundary for --color-words configurable Ping Yin
2008-05-02 3:54 ` Junio C Hamano
2008-05-02 4:28 ` Ping Yin
2008-05-02 13:59 ` [PATCH] Make boundary characters " Ping Yin
2008-05-02 14:26 ` Ping Yin
2008-05-02 14:27 ` Ping Yin
2008-05-03 11:57 ` [PATCH v2 0/5] " Ping Yin
2008-05-03 11:57 ` [PATCH v2 1/5] diff.c: Remove code redundancy in diff_words_show Ping Yin
2008-05-03 11:57 ` [PATCH v2 2/5] diff.c: Use show variable name in fn_out_diff_words_aux Ping Yin
2008-05-03 11:57 ` [PATCH v2 3/5] diff.c: Fix --color-words showing trailing deleted words at another line Ping Yin
2008-05-03 11:57 ` [PATCH v2 4/5] Make boundary characters for --color-words configurable Ping Yin
2008-05-03 11:57 ` [PATCH v2 5/5] fn_out_diff_words_aux: Handle common diff line more carefully Ping Yin
2008-05-03 18:18 ` [PATCH v2 4/5] Make boundary characters for --color-words configurable Junio C Hamano
2008-05-03 18:41 ` Teemu Likonen
2008-05-04 0:32 ` Ping Yin
2008-05-04 9:44 ` Johannes Schindelin
2008-05-04 16:35 ` Ping Yin
2008-05-04 20:16 ` Junio C Hamano
2008-05-04 20:47 ` Jakub Narebski
2008-05-04 21:27 ` Teemu Likonen
2008-05-05 12:14 ` Johannes Schindelin
2008-05-05 1:40 ` Ping Yin
2008-05-05 5:00 ` Junio C Hamano
2008-05-05 12:10 ` Ping Yin
2008-05-06 0:40 ` Ping Yin
2008-05-06 8:55 ` Johannes Schindelin
2008-05-07 1:15 ` Ping Yin
2008-05-07 11:24 ` Johannes Schindelin
2008-05-07 12:19 ` Ping Yin
2008-05-07 13:10 ` Johannes Schindelin
2008-05-07 14:11 ` Ping Yin
2008-05-07 19:13 ` Junio C Hamano [this message]
2008-05-07 19:33 ` Junio C Hamano
2008-05-07 19:45 ` Jeff King
2008-05-07 20:02 ` Junio C Hamano
2008-05-07 22:04 ` Jeff King
2008-05-08 10:34 ` Teemu Likonen
2008-05-10 9:02 ` Ping Yin
2008-05-10 9:14 ` Teemu Likonen
2008-05-11 13:16 ` Ping Yin
2008-05-11 13:27 ` Ping Yin
2008-05-11 16:27 ` Junio C Hamano
2008-05-12 16:31 ` Ping Yin
2008-05-12 18:57 ` Jakub Narebski
2008-05-12 19:17 ` Junio C Hamano
2008-05-12 19:57 ` Jakub Narebski
2008-05-13 1:37 ` Ping Yin
2008-05-13 1:42 ` Ping Yin
2008-05-10 8:20 ` Ping Yin
2008-05-05 11:51 ` Johannes Schindelin
2008-05-05 12:02 ` Ping Yin
2008-05-03 18:01 ` [PATCH v2 3/5] diff.c: Fix --color-words showing trailing deleted words at another line Junio C Hamano
2008-05-03 12:01 ` [PATCH v2 2/5] diff.c: Use show variable name in fn_out_diff_words_aux Ping Yin
2008-05-03 17:47 ` Junio C Hamano
2008-05-03 18:20 ` [PATCH v2 1/5] diff.c: Remove code redundancy in diff_words_show Junio C Hamano
2008-05-04 4:20 ` [PATCH v3 0/6] --color-words improvement Ping Yin
2008-05-04 4:20 ` [PATCH v3 1/6] diff.c: Remove code redundancy in diff_words_show Ping Yin
2008-05-04 4:20 ` [PATCH v3 2/6] fn_out_diff_words_aux: Use short variable name Ping Yin
2008-05-04 4:20 ` [PATCH v3 3/6] --color-words: Fix showing trailing deleted words at another line Ping Yin
2008-05-04 4:20 ` [PATCH v3 4/6] --color-words: Make non-word characters configurable Ping Yin
2008-05-04 4:20 ` [PATCH v3 5/6] fn_out_diff_words_aux: Handle common diff line more carefully Ping Yin
2008-05-04 4:20 ` [PATCH v3 6/6] --color-words: Add test t4030 Ping Yin
2008-05-04 9:54 ` [PATCH v3 5/6] fn_out_diff_words_aux: Handle common diff line more carefully Johannes Schindelin
2008-05-04 16:53 ` Ping Yin
2008-05-05 12:11 ` Johannes Schindelin
2008-05-05 14:18 ` Ping Yin
2008-05-04 6:45 ` [PATCH v3 4/6] --color-words: Make non-word characters configurable Junio C Hamano
2008-05-04 7:04 ` Ping Yin
2008-05-04 9:52 ` [PATCH v3 3/6] --color-words: Fix showing trailing deleted words at another line Johannes Schindelin
2008-05-04 16:48 ` Ping Yin
2008-05-05 12:10 ` Johannes Schindelin
2008-05-04 9:47 ` [PATCH v3 2/6] fn_out_diff_words_aux: Use short variable name Johannes Schindelin
2008-05-04 16:39 ` Ping Yin
2008-05-05 12:05 ` Johannes Schindelin
2008-05-04 9:46 ` [PATCH v3 1/6] diff.c: Remove code redundancy in diff_words_show Johannes Schindelin
2008-05-02 14:36 ` [PATCH] Make boundary characters for --color-words configurable Teemu Likonen
2008-05-03 0:22 ` Ping Yin
2008-05-03 13:22 ` Dirk Süsserott
2008-05-03 13:57 ` Ping Yin
2008-05-03 14:03 ` [PATCH] --color-words: Make the word characters configurable Johannes Schindelin
2008-05-03 14:13 ` Ping Yin
2008-05-03 14:23 ` Johannes Schindelin
2008-05-03 14:43 ` Teemu Likonen
2008-05-04 9:18 ` Johannes Schindelin
2008-05-03 17:43 ` Junio C Hamano
2008-05-04 9:25 ` Johannes Schindelin
2008-05-02 7:45 ` [PATCH] Make words boundary for --color-words configurable Johannes Schindelin
2008-05-02 8:14 ` Teemu Likonen
2008-05-02 9:23 ` Ping Yin
2008-05-02 10:01 ` Teemu Likonen
2008-05-02 9:28 ` Ping Yin
2008-05-03 0:18 ` Jakub Narebski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7viqxqc4gs.fsf@gitster.siamese.dyndns.org \
--to=gitster@pobox.com \
--cc=Johannes.Schindelin@gmx.de \
--cc=git@vger.kernel.org \
--cc=pkufranky@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).