git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Ping Yin <pkufranky@gmail.com>, git@vger.kernel.org
Subject: Re: [PATCH v2 4/5] Make boundary characters for --color-words configurable
Date: Wed, 07 May 2008 12:13:39 -0700	[thread overview]
Message-ID: <7viqxqc4gs.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <alpine.DEB.1.00.0805071223450.30431@racer> (Johannes Schindelin's message of "Wed, 7 May 2008 12:24:19 +0100 (BST)")

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> I am rather interested in the semantics, i.e. if you can punch holes into 
> this 3-class approach.

This is not the 3-class thing, but was done as a lunchtime hack.  It
removes more lines than it adds, with real comments ;-).

 * It removes the custom allocator used for minus/plus buffers and
   replaces it with the bog-standard strbuf;

 * The tokenization is done when diff_words_append() is called, i.e. when
   we read the original "added or deleted _lines_";

 * The tokenization function is separated out, and gets the emit_callback,
   so anybody can enhance it with customization using gitattributes and
   other heuristics.  More importantly, it is not byte oriented and would
   be easier to extend it to UTF-8 contents;

 * It does not have to play "suppressed_newline" games anymore.  A LF is
   just a token.

I haven't tested this at all (this is a lunchtime hack) and have a mild
suspicion that it may have corner case miscounting (e.g. I blindly
subtracts 3 from len when dealing with a line that represents a single
token from the internal diff output --- do I always have 3 there even when
the original file ends with an incomplete line?  I didn't check), but
other than that I think this is a lot easier to read and follow.

---

 diff.c |  216 +++++++++++++++++++++++++++++++--------------------------------
 1 files changed, 106 insertions(+), 110 deletions(-)

diff --git a/diff.c b/diff.c
index e35384b..344aaa6 100644
--- a/diff.c
+++ b/diff.c
@@ -351,87 +351,119 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	return 0;
 }
 
-struct diff_words_buffer {
-	mmfile_t text;
-	long alloc;
-	long current; /* output pointer */
-	int suppressed_newline;
+typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
+
+struct emit_callback {
+	struct xdiff_emit_state xm;
+	int nparents, color_diff;
+	unsigned ws_rule;
+	sane_truncate_fn truncate;
+	const char **label_path;
+	struct diff_words_data *diff_words;
+	int *found_changesp;
+	FILE *file;
 };
 
-static void diff_words_append(char *line, unsigned long len,
-		struct diff_words_buffer *buffer)
+static size_t diff_words_tokenize(struct emit_callback *ecbdata,
+				  char *line, unsigned long len)
 {
-	if (buffer->text.size + len > buffer->alloc) {
-		buffer->alloc = (buffer->text.size + len) * 3 / 2;
-		buffer->text.ptr = xrealloc(buffer->text.ptr, buffer->alloc);
+	/*
+	 * This function currently is deliberately done very stupid,
+	 * but passing ecbdata here means that you can potentially
+	 * implement different tokenization rules depending on
+	 * the content (e.g. "gitattributes(5)").
+	 */
+	int is_space;
+	char *line0 = line;
+
+	if (!len)
+		return 0;
+
+	is_space = isspace(*line);
+	while (len && (isspace(*line) == is_space)) {
+		line++;
+		len--;
 	}
+	return line - line0;
+}
+
+static void diff_words_append(struct emit_callback *ecbdata,
+			      char *line, unsigned long len,
+			      struct strbuf *text)
+{
+	/* Skip leading +/- first. */
 	line++;
 	len--;
-	memcpy(buffer->text.ptr + buffer->text.size, line, len);
-	buffer->text.size += len;
+
+	/*
+	 * Tokenize and stuff the words in.
+	 */
+	while (len) {
+		size_t token_len = diff_words_tokenize(ecbdata, line, len);
+
+		if (line[0] != '\n') {
+			/*
+			 * A nonempty token has ' ' stuffed in front,
+			 * so that we can recover the original
+			 * end-of-line easily.  Stupid, but works.
+			 */
+			strbuf_add(text, " ", 1);
+			strbuf_add(text, line, token_len);
+			strbuf_add(text, "\n", 1);
+			len -= token_len;
+			line += token_len;
+		} else {
+			/* A real LF */
+			strbuf_add(text, "\n", 1);
+			break;
+		}
+	}
 }
 
 struct diff_words_data {
 	struct xdiff_emit_state xm;
-	struct diff_words_buffer minus, plus;
+	struct strbuf minus;
+	struct strbuf plus;
 	FILE *file;
 };
 
-static void print_word(FILE *file, struct diff_words_buffer *buffer, int len, int color,
-		int suppress_newline)
+static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len)
 {
-	const char *ptr;
-	int eol = 0;
-
-	if (len == 0)
-		return;
-
-	ptr  = buffer->text.ptr + buffer->current;
-	buffer->current += len;
-
-	if (ptr[len - 1] == '\n') {
-		eol = 1;
-		len--;
-	}
-
-	fputs(diff_get_color(1, color), file);
-	fwrite(ptr, len, 1, file);
-	fputs(diff_get_color(1, DIFF_RESET), file);
-
-	if (eol) {
-		if (suppress_newline)
-			buffer->suppressed_newline = 1;
-		else
-			putc('\n', file);
-	}
+	fputs(set, file);
+	fwrite(line, len, 1, file);
+	fputs(reset, file);
 }
 
 static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
 {
 	struct diff_words_data *diff_words = priv;
+	const char *set;
+	const char *reset = diff_colors[DIFF_RESET];
 
-	if (diff_words->minus.suppressed_newline) {
-		if (line[0] != '+')
-			putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
+	switch (line[0]) {
+	case '-':
+		set = diff_colors[DIFF_FILE_OLD];
+		break;
+	case '+':
+		set = diff_colors[DIFF_FILE_NEW];
+		break;
+	case ' ':
+		set = diff_colors[DIFF_PLAIN];
+		break;
+	default:
+		return; /* omit @@ -j,k +l,m @@ header */
 	}
 
-	len--;
-	switch (line[0]) {
-		case '-':
-			print_word(diff_words->file,
-				   &diff_words->minus, len, DIFF_FILE_OLD, 1);
-			break;
-		case '+':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_FILE_NEW, 0);
-			break;
-		case ' ':
-			print_word(diff_words->file,
-				   &diff_words->plus, len, DIFF_PLAIN, 0);
-			diff_words->minus.current += len;
-			break;
+	if (line[1] == ' ') {
+		/* A token */
+		line += 2;
+		len -= 3; /* drop the trailing LF */
+	} else {
+		/* A real LF */
+		line++;
+		len--;
 	}
+	emit_line(diff_words->file, set, reset, line, len);
 }
 
 /* this executes the word diff on the accumulated buffers */
@@ -441,27 +473,18 @@ static void diff_words_show(struct diff_words_data *diff_words)
 	xdemitconf_t xecfg;
 	xdemitcb_t ecb;
 	mmfile_t minus, plus;
-	int i;
+	unsigned long sz;
 
 	memset(&xecfg, 0, sizeof(xecfg));
-	minus.size = diff_words->minus.text.size;
-	minus.ptr = xmalloc(minus.size);
-	memcpy(minus.ptr, diff_words->minus.text.ptr, minus.size);
-	for (i = 0; i < minus.size; i++)
-		if (isspace(minus.ptr[i]))
-			minus.ptr[i] = '\n';
-	diff_words->minus.current = 0;
-
-	plus.size = diff_words->plus.text.size;
-	plus.ptr = xmalloc(plus.size);
-	memcpy(plus.ptr, diff_words->plus.text.ptr, plus.size);
-	for (i = 0; i < plus.size; i++)
-		if (isspace(plus.ptr[i]))
-			plus.ptr[i] = '\n';
-	diff_words->plus.current = 0;
+
+	minus.ptr = strbuf_detach(&diff_words->minus, &sz);
+	minus.size = sz;
+	plus.ptr = strbuf_detach(&diff_words->plus, &sz);
+	plus.size = sz;
 
 	xpp.flags = XDF_NEED_MINIMAL;
-	xecfg.ctxlen = diff_words->minus.alloc + diff_words->plus.alloc;
+	/* hack to make it a single hunk to show all */
+	xecfg.ctxlen = minus.size + plus.size;
 	ecb.outf = xdiff_outf;
 	ecb.priv = diff_words;
 	diff_words->xm.consume = fn_out_diff_words_aux;
@@ -469,37 +492,15 @@ static void diff_words_show(struct diff_words_data *diff_words)
 
 	free(minus.ptr);
 	free(plus.ptr);
-	diff_words->minus.text.size = diff_words->plus.text.size = 0;
-
-	if (diff_words->minus.suppressed_newline) {
-		putc('\n', diff_words->file);
-		diff_words->minus.suppressed_newline = 0;
-	}
 }
 
-typedef unsigned long (*sane_truncate_fn)(char *line, unsigned long len);
-
-struct emit_callback {
-	struct xdiff_emit_state xm;
-	int nparents, color_diff;
-	unsigned ws_rule;
-	sane_truncate_fn truncate;
-	const char **label_path;
-	struct diff_words_data *diff_words;
-	int *found_changesp;
-	FILE *file;
-};
-
 static void free_diff_words_data(struct emit_callback *ecbdata)
 {
 	if (ecbdata->diff_words) {
 		/* flush buffers */
-		if (ecbdata->diff_words->minus.text.size ||
-				ecbdata->diff_words->plus.text.size)
+		if (ecbdata->diff_words->minus.len ||
+		    ecbdata->diff_words->plus.len)
 			diff_words_show(ecbdata->diff_words);
-
-		free (ecbdata->diff_words->minus.text.ptr);
-		free (ecbdata->diff_words->plus.text.ptr);
 		free(ecbdata->diff_words);
 		ecbdata->diff_words = NULL;
 	}
@@ -512,13 +513,6 @@ const char *diff_get_color(int diff_use_color, enum color_diff ix)
 	return "";
 }
 
-static void emit_line(FILE *file, const char *set, const char *reset, const char *line, int len)
-{
-	fputs(set, file);
-	fwrite(line, len, 1, file);
-	fputs(reset, file);
-}
-
 static void emit_add_line(const char *reset, struct emit_callback *ecbdata, const char *line, int len)
 {
 	const char *ws = diff_get_color(ecbdata->color_diff, DIFF_WHITESPACE);
@@ -604,16 +598,16 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
 		free_diff_words_data(ecbdata);
 	if (ecbdata->diff_words) {
 		if (line[0] == '-') {
-			diff_words_append(line, len,
+			diff_words_append(ecbdata, line, len,
 					  &ecbdata->diff_words->minus);
 			return;
 		} else if (line[0] == '+') {
-			diff_words_append(line, len,
+			diff_words_append(ecbdata, line, len,
 					  &ecbdata->diff_words->plus);
 			return;
 		}
-		if (ecbdata->diff_words->minus.text.size ||
-		    ecbdata->diff_words->plus.text.size)
+		if (ecbdata->diff_words->minus.len ||
+		    ecbdata->diff_words->plus.len)
 			diff_words_show(ecbdata->diff_words);
 		line++;
 		len--;
@@ -1470,6 +1464,8 @@ static void builtin_diff(const char *name_a,
 		if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS)) {
 			ecbdata.diff_words =
 				xcalloc(1, sizeof(struct diff_words_data));
+			strbuf_init(&ecbdata.diff_words->minus, 0);
+			strbuf_init(&ecbdata.diff_words->plus, 0);
 			ecbdata.diff_words->file = o->file;
 		}
 		xdi_diff(&mf1, &mf2, &xpp, &xecfg, &ecb);

  parent reply	other threads:[~2008-05-07 19:14 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-02  3:39 [PATCH] Make words boundary for --color-words configurable Ping Yin
2008-05-02  3:54 ` Junio C Hamano
2008-05-02  4:28   ` Ping Yin
2008-05-02 13:59     ` [PATCH] Make boundary characters " Ping Yin
2008-05-02 14:26       ` Ping Yin
2008-05-02 14:27         ` Ping Yin
2008-05-03 11:57         ` [PATCH v2 0/5] " Ping Yin
2008-05-03 11:57           ` [PATCH v2 1/5] diff.c: Remove code redundancy in diff_words_show Ping Yin
2008-05-03 11:57             ` [PATCH v2 2/5] diff.c: Use show variable name in fn_out_diff_words_aux Ping Yin
2008-05-03 11:57               ` [PATCH v2 3/5] diff.c: Fix --color-words showing trailing deleted words at another line Ping Yin
2008-05-03 11:57                 ` [PATCH v2 4/5] Make boundary characters for --color-words configurable Ping Yin
2008-05-03 11:57                   ` [PATCH v2 5/5] fn_out_diff_words_aux: Handle common diff line more carefully Ping Yin
2008-05-03 18:18                   ` [PATCH v2 4/5] Make boundary characters for --color-words configurable Junio C Hamano
2008-05-03 18:41                     ` Teemu Likonen
2008-05-04  0:32                     ` Ping Yin
2008-05-04  9:44                       ` Johannes Schindelin
2008-05-04 16:35                         ` Ping Yin
2008-05-04 20:16                           ` Junio C Hamano
2008-05-04 20:47                             ` Jakub Narebski
2008-05-04 21:27                               ` Teemu Likonen
2008-05-05 12:14                               ` Johannes Schindelin
2008-05-05  1:40                             ` Ping Yin
2008-05-05  5:00                               ` Junio C Hamano
2008-05-05 12:10                                 ` Ping Yin
2008-05-06  0:40                                   ` Ping Yin
2008-05-06  8:55                                     ` Johannes Schindelin
2008-05-07  1:15                                       ` Ping Yin
2008-05-07 11:24                                         ` Johannes Schindelin
2008-05-07 12:19                                           ` Ping Yin
2008-05-07 13:10                                             ` Johannes Schindelin
2008-05-07 14:11                                               ` Ping Yin
2008-05-07 19:13                                           ` Junio C Hamano [this message]
2008-05-07 19:33                                             ` Junio C Hamano
2008-05-07 19:45                                             ` Jeff King
2008-05-07 20:02                                               ` Junio C Hamano
2008-05-07 22:04                                                 ` Jeff King
2008-05-08 10:34                                             ` Teemu Likonen
2008-05-10  9:02                                               ` Ping Yin
2008-05-10  9:14                                                 ` Teemu Likonen
2008-05-11 13:16                                                 ` Ping Yin
2008-05-11 13:27                                                   ` Ping Yin
2008-05-11 16:27                                                   ` Junio C Hamano
2008-05-12 16:31                                                     ` Ping Yin
2008-05-12 18:57                                                       ` Jakub Narebski
2008-05-12 19:17                                                         ` Junio C Hamano
2008-05-12 19:57                                                           ` Jakub Narebski
2008-05-13  1:37                                                           ` Ping Yin
2008-05-13  1:42                                                         ` Ping Yin
2008-05-10  8:20                                             ` Ping Yin
2008-05-05 11:51                           ` Johannes Schindelin
2008-05-05 12:02                             ` Ping Yin
2008-05-03 18:01                 ` [PATCH v2 3/5] diff.c: Fix --color-words showing trailing deleted words at another line Junio C Hamano
2008-05-03 12:01               ` [PATCH v2 2/5] diff.c: Use show variable name in fn_out_diff_words_aux Ping Yin
2008-05-03 17:47               ` Junio C Hamano
2008-05-03 18:20             ` [PATCH v2 1/5] diff.c: Remove code redundancy in diff_words_show Junio C Hamano
2008-05-04  4:20           ` [PATCH v3 0/6] --color-words improvement Ping Yin
2008-05-04  4:20             ` [PATCH v3 1/6] diff.c: Remove code redundancy in diff_words_show Ping Yin
2008-05-04  4:20               ` [PATCH v3 2/6] fn_out_diff_words_aux: Use short variable name Ping Yin
2008-05-04  4:20                 ` [PATCH v3 3/6] --color-words: Fix showing trailing deleted words at another line Ping Yin
2008-05-04  4:20                   ` [PATCH v3 4/6] --color-words: Make non-word characters configurable Ping Yin
2008-05-04  4:20                     ` [PATCH v3 5/6] fn_out_diff_words_aux: Handle common diff line more carefully Ping Yin
2008-05-04  4:20                       ` [PATCH v3 6/6] --color-words: Add test t4030 Ping Yin
2008-05-04  9:54                       ` [PATCH v3 5/6] fn_out_diff_words_aux: Handle common diff line more carefully Johannes Schindelin
2008-05-04 16:53                         ` Ping Yin
2008-05-05 12:11                           ` Johannes Schindelin
2008-05-05 14:18                             ` Ping Yin
2008-05-04  6:45                     ` [PATCH v3 4/6] --color-words: Make non-word characters configurable Junio C Hamano
2008-05-04  7:04                       ` Ping Yin
2008-05-04  9:52                   ` [PATCH v3 3/6] --color-words: Fix showing trailing deleted words at another line Johannes Schindelin
2008-05-04 16:48                     ` Ping Yin
2008-05-05 12:10                       ` Johannes Schindelin
2008-05-04  9:47                 ` [PATCH v3 2/6] fn_out_diff_words_aux: Use short variable name Johannes Schindelin
2008-05-04 16:39                   ` Ping Yin
2008-05-05 12:05                     ` Johannes Schindelin
2008-05-04  9:46               ` [PATCH v3 1/6] diff.c: Remove code redundancy in diff_words_show Johannes Schindelin
2008-05-02 14:36       ` [PATCH] Make boundary characters for --color-words configurable Teemu Likonen
2008-05-03  0:22         ` Ping Yin
2008-05-03 13:22           ` Dirk Süsserott
2008-05-03 13:57             ` Ping Yin
2008-05-03 14:03       ` [PATCH] --color-words: Make the word characters configurable Johannes Schindelin
2008-05-03 14:13         ` Ping Yin
2008-05-03 14:23           ` Johannes Schindelin
2008-05-03 14:43         ` Teemu Likonen
2008-05-04  9:18           ` Johannes Schindelin
2008-05-03 17:43         ` Junio C Hamano
2008-05-04  9:25           ` Johannes Schindelin
2008-05-02  7:45 ` [PATCH] Make words boundary for --color-words configurable Johannes Schindelin
2008-05-02  8:14   ` Teemu Likonen
2008-05-02  9:23     ` Ping Yin
2008-05-02 10:01     ` Teemu Likonen
2008-05-02  9:28   ` Ping Yin
2008-05-03  0:18   ` Jakub Narebski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7viqxqc4gs.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=pkufranky@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).