From: Thomas Rast <trast@student.ethz.ch>
To: git@vger.kernel.org
Cc: Junio C Hamano <junio@pobox.com>,
Johannes Schindelin <johannes.schindelin@gmx.de>,
Teemu Likonen <tlikonen@iki.fi>
Subject: [PATCH v3 3/4] word diff: make regex configurable via attributes
Date: Sun, 11 Jan 2009 11:27:13 +0100 [thread overview]
Message-ID: <72242bd75fa8d55c2afc723f8539ef56f2569d3e.1231669012.git.trast@student.ethz.ch> (raw)
In-Reply-To: <529cd830908f018f796dbc46d3b055c1f8ba9c1b.1231669012.git.trast@student.ethz.ch>
In-Reply-To: <7vr63atykr.fsf@gitster.siamese.dyndns.org>
Make the --color-words splitting regular expression configurable via
the diff driver's 'wordregex' attribute. The user can then set the
driver on a file in .gitattributes. If a regex is given on the
command line, it overrides the driver's setting.
We also provide built-in regexes for some of the languages that
already had funcname patterns. (They are designed to run UTF-8
sequences into a single chunk to make sure they remain readable.)
Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---
Documentation/diff-options.txt | 4 +++-
Documentation/gitattributes.txt | 21 +++++++++++++++++++++
diff.c | 10 ++++++++++
userdiff.c | 27 +++++++++++++++++++--------
userdiff.h | 1 +
5 files changed, 54 insertions(+), 9 deletions(-)
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 6152d5b..d22c06b 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -96,7 +96,9 @@ endif::git-format-patch[]
By default, a new word only starts at whitespace, so that a
'word' is defined as a maximal sequence of non-whitespace
characters. The optional argument <regex> can be used to
- configure this.
+ configure this. It can also be set via a diff driver, see
+ linkgit:gitattributes[1]; if a <regex> is given explicitly, it
+ overrides any diff driver setting.
+
The <regex> must be an (extended) regular expression. When set, every
non-overlapping match of the <regex> is considered a word. (Regular
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 8af22ec..67f5522 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -334,6 +334,27 @@ patterns are available:
- `tex` suitable for source code for LaTeX documents.
+Customizing word diff
+^^^^^^^^^^^^^^^^^^^^^
+
+You can customize the rules that `git diff --color-words` uses to
+split words in a line, by specifying an appropriate regular expression
+in the "diff.*.wordregex" configuration variable. For example, in TeX
+a backslash followed by a sequence of letters forms a command, but
+several such commands can be run together without intervening
+whitespace. To separate them, use a regular expression such as
+
+------------------------
+[diff "tex"]
+ wordregex = "\\\\[a-zA-Z]+|[{}]|\\\\.|[^\\{} \t]+"
+------------------------
+
+Similar to 'xfuncname', a built in value is provided for the drivers
+`bibtex`, `html`, `java`, `php`, `python` and `tex`. See the
+documentation of --color-words in linkgit:git-diff[1] for the precise
+semantics.
+
+
Performing text diffs of binary files
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/diff.c b/diff.c
index badaea6..c1cc426 100644
--- a/diff.c
+++ b/diff.c
@@ -1548,6 +1548,12 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe
return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
}
+static const char *userdiff_word_regex(struct diff_filespec *one)
+{
+ diff_filespec_load_driver(one);
+ return one->driver->word_regex;
+}
+
void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
{
if (!options->a_prefix)
@@ -1708,6 +1714,10 @@ static void builtin_diff(const char *name_a,
ecbdata.diff_words =
xcalloc(1, sizeof(struct diff_words_data));
ecbdata.diff_words->file = o->file;
+ if (!o->word_regex)
+ o->word_regex = userdiff_word_regex(one);
+ if (!o->word_regex)
+ o->word_regex = userdiff_word_regex(two);
if (o->word_regex) {
ecbdata.diff_words->word_regex = (regex_t *)
xmalloc(sizeof(regex_t));
diff --git a/userdiff.c b/userdiff.c
index 3681062..7fd9a07 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -6,13 +6,17 @@ static struct userdiff_driver *drivers;
static int ndrivers;
static int drivers_alloc;
-#define FUNCNAME(name, pattern) \
+#define FUNCNAME(name, pattern) \
{ name, NULL, -1, { pattern, REG_EXTENDED } }
+#define PATTERNS(name, pattern, wordregex) \
+ { name, NULL, -1, { pattern, REG_EXTENDED }, NULL, wordregex }
static struct userdiff_driver builtin_drivers[] = {
-FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
-FUNCNAME("java",
+PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
+ "[^<>= \t]+|\\S"),
+PATTERNS("java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
- "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
+ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+ "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|\\S|[\x80-\xff]+"),
FUNCNAME("objc",
/* Negate C statements that can look like functions */
"!^[ \t]*(do|for|if|else|return|switch|while)\n"
@@ -27,14 +31,19 @@ FUNCNAME("pascal",
"implementation|initialization|finalization)[ \t]*.*)$"
"\n"
"^(.*=[ \t]*(class|record).*)$"),
-FUNCNAME("php", "^[\t ]*((function|class).*)"),
-FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
+PATTERNS("php", "^[\t ]*((function|class).*)",
+ "\\$?[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|\\+\\+|--|->|\\S|[\x80-\xff]+"),
+PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
+ "[a-zA-Z_][a-zA-Z0-9_]*|[-+0-9.e]+|[-+*/]=|//|\\S|[\x80-\xff]+"),
FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
-FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
-FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+ "[={}\"]|[^={}\" \t]+"),
+PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+ "\\\\[a-zA-Z@]+|[{}]|\\\\.|[^\\{} \t]+"),
{ "default", NULL, -1, { NULL, 0 } },
};
#undef FUNCNAME
+#undef PATTERNS
static struct userdiff_driver driver_true = {
"diff=true",
@@ -134,6 +143,8 @@ int userdiff_config(const char *k, const char *v)
return parse_string(&drv->external, k, v);
if ((drv = parse_driver(k, v, "textconv")))
return parse_string(&drv->textconv, k, v);
+ if ((drv = parse_driver(k, v, "wordregex")))
+ return parse_string(&drv->word_regex, k, v);
return 0;
}
diff --git a/userdiff.h b/userdiff.h
index ba29457..2aab13e 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -12,6 +12,7 @@ struct userdiff_driver {
int binary;
struct userdiff_funcname funcname;
const char *textconv;
+ const char *word_regex;
};
int userdiff_config(const char *k, const char *v);
--
1.6.1.269.g0769
next prev parent reply other threads:[~2009-01-11 10:28 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-09 0:05 [RFC PATCH] make diff --color-words customizable Thomas Rast
2009-01-09 0:25 ` Johannes Schindelin
2009-01-09 0:50 ` Thomas Rast
2009-01-09 11:15 ` Johannes Schindelin
2009-01-09 11:59 ` [ILLUSTRATION PATCH] color-words: take an optional regular expression describing words Johannes Schindelin
2009-01-09 12:24 ` Thomas Rast
2009-01-09 13:05 ` Teemu Likonen
2009-01-10 0:57 ` [PATCH v2] make diff --color-words customizable Thomas Rast
2009-01-10 1:50 ` Jakub Narebski
2009-01-10 11:37 ` Johannes Schindelin
2009-01-10 13:36 ` Jakub Narebski
2009-01-10 14:08 ` Johannes Schindelin
2009-01-12 23:59 ` Jakub Narebski
2009-01-13 0:40 ` Johannes Schindelin
2009-01-10 17:53 ` Davide Libenzi
2009-01-13 0:52 ` Jakub Narebski
2009-01-13 18:50 ` Davide Libenzi
2009-01-10 10:49 ` Johannes Schindelin
2009-01-10 11:25 ` Thomas Rast
2009-01-10 11:45 ` Johannes Schindelin
2009-01-11 1:34 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 0/4] customizable --color-words Thomas Rast
2009-01-11 10:27 ` [PATCH v3 1/4] word diff: comments, preparations for regex customization Thomas Rast
2009-01-11 13:41 ` Johannes Schindelin
2009-01-11 19:49 ` Johannes Schindelin
2009-01-11 22:19 ` Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 2/4] word diff: customizable word splits Thomas Rast
2009-01-11 22:20 ` Junio C Hamano
2009-01-11 10:27 ` Thomas Rast [this message]
2009-01-11 23:20 ` [PATCH v3 3/4] word diff: make regex configurable via attributes Junio C Hamano
2009-01-11 10:27 ` [PATCH v3 4/4] word diff: test customizable word splits Thomas Rast
2009-01-09 9:53 ` [RFC PATCH] make diff --color-words customizable Jeff King
2009-01-09 11:18 ` Johannes Schindelin
2009-01-09 11:22 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=72242bd75fa8d55c2afc723f8539ef56f2569d3e.1231669012.git.trast@student.ethz.ch \
--to=trast@student.ethz.ch \
--cc=git@vger.kernel.org \
--cc=johannes.schindelin@gmx.de \
--cc=junio@pobox.com \
--cc=tlikonen@iki.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).