* [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern
[not found] <7vskry1485.fsf@gitster.siamese.dyndns.org>
@ 2008-09-18 22:40 ` Brandon Casey
2008-09-19 18:14 ` Boyd Lynn Gerber
2008-09-18 22:42 ` [PATCH v2 2/4] diff.c: associate a flag with each pattern and use it for compiling regex Brandon Casey
` (2 subsequent siblings)
3 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-18 22:40 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
This is in preparation for associating a flag with each pattern which will
control how the pattern is interpreted. For example, as a basic or extended
regular expression.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
Junio C Hamano wrote:
> Brandon Casey <casey@nrlssc.navy.mil> writes:
>
>> diff --git a/diff.c b/diff.c
>> index 998dcaa..e040088 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -94,6 +94,8 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
>> * 'diff.<what>.funcname' attribute can be specified in the configuration
>> * to define a customized regexp to find the beginning of a function to
>> * be used for hunk header lines of "diff -p" style output.
>> + * Note: If this structure is modified, it must retain the ability to be cast
>> + * to a struct funcname_pattern_entry, defined elsewhere.
>> */
>
> Yuck. Why not do:
>
> struct funcname_pattern_entry {
> /* whatever fields one entry needs */
> };
> static struct funcname_pattern_list {
> struct funcname_pattern_list *next;
> struct funcname_pattern_entry e;
> } *funcname_pattern_list;
<...>
> Then you do not have to worry about casting things up and down, right?
diff.c | 55 ++++++++++++++++++++++++++++---------------------------
1 files changed, 28 insertions(+), 27 deletions(-)
diff --git a/diff.c b/diff.c
index 998dcaa..3928124 100644
--- a/diff.c
+++ b/diff.c
@@ -95,32 +95,35 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
* to define a customized regexp to find the beginning of a function to
* be used for hunk header lines of "diff -p" style output.
*/
-static struct funcname_pattern {
+struct funcname_pattern_entry {
char *name;
char *pattern;
- struct funcname_pattern *next;
+};
+static struct funcname_pattern_list {
+ struct funcname_pattern_list *next;
+ struct funcname_pattern_entry e;
} *funcname_pattern_list;
static int parse_funcname_pattern(const char *var, const char *ep, const char *value)
{
const char *name;
int namelen;
- struct funcname_pattern *pp;
+ struct funcname_pattern_list *pp;
name = var + 5; /* "diff." */
namelen = ep - name;
for (pp = funcname_pattern_list; pp; pp = pp->next)
- if (!strncmp(pp->name, name, namelen) && !pp->name[namelen])
+ if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen])
break;
if (!pp) {
pp = xcalloc(1, sizeof(*pp));
- pp->name = xmemdupz(name, namelen);
+ pp->e.name = xmemdupz(name, namelen);
pp->next = funcname_pattern_list;
funcname_pattern_list = pp;
}
- free(pp->pattern);
- pp->pattern = xstrdup(value);
+ free(pp->e.pattern);
+ pp->e.pattern = xstrdup(value);
return 0;
}
@@ -1382,20 +1385,17 @@ int diff_filespec_is_binary(struct diff_filespec *one)
return one->is_binary;
}
-static const char *funcname_pattern(const char *ident)
+static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
{
- struct funcname_pattern *pp;
+ struct funcname_pattern_list *pp;
for (pp = funcname_pattern_list; pp; pp = pp->next)
- if (!strcmp(ident, pp->name))
- return pp->pattern;
+ if (!strcmp(ident, pp->e.name))
+ return &pp->e;
return NULL;
}
-static struct builtin_funcname_pattern {
- const char *name;
- const char *pattern;
-} builtin_funcname_pattern[] = {
+static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$" },
{ "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$" },
{ "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
@@ -1415,9 +1415,10 @@ static struct builtin_funcname_pattern {
{ "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$" },
};
-static const char *diff_funcname_pattern(struct diff_filespec *one)
+static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
{
- const char *ident, *pattern;
+ const char *ident;
+ const struct funcname_pattern_entry *pe;
int i;
diff_filespec_check_attr(one);
@@ -1432,9 +1433,9 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
return funcname_pattern("default");
/* Look up custom "funcname.$ident" regexp from config. */
- pattern = funcname_pattern(ident);
- if (pattern)
- return pattern;
+ pe = funcname_pattern(ident);
+ if (pe)
+ return pe;
/*
* And define built-in fallback patterns here. Note that
@@ -1442,7 +1443,7 @@ static const char *diff_funcname_pattern(struct diff_filespec *one)
*/
for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++)
if (!strcmp(ident, builtin_funcname_pattern[i].name))
- return builtin_funcname_pattern[i].pattern;
+ return &builtin_funcname_pattern[i];
return NULL;
}
@@ -1520,11 +1521,11 @@ static void builtin_diff(const char *name_a,
xdemitconf_t xecfg;
xdemitcb_t ecb;
struct emit_callback ecbdata;
- const char *funcname_pattern;
+ const struct funcname_pattern_entry *pe;
- funcname_pattern = diff_funcname_pattern(one);
- if (!funcname_pattern)
- funcname_pattern = diff_funcname_pattern(two);
+ pe = diff_funcname_pattern(one);
+ if (!pe)
+ pe = diff_funcname_pattern(two);
memset(&xecfg, 0, sizeof(xecfg));
memset(&ecbdata, 0, sizeof(ecbdata));
@@ -1536,8 +1537,8 @@ static void builtin_diff(const char *name_a,
xpp.flags = XDF_NEED_MINIMAL | o->xdl_opts;
xecfg.ctxlen = o->context;
xecfg.flags = XDL_EMIT_FUNCNAMES;
- if (funcname_pattern)
- xdiff_set_find_func(&xecfg, funcname_pattern);
+ if (pe)
+ xdiff_set_find_func(&xecfg, pe->pattern);
if (!diffopts)
;
else if (!prefixcmp(diffopts, "--unified="))
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern
2008-09-18 22:40 ` [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern Brandon Casey
@ 2008-09-19 18:14 ` Boyd Lynn Gerber
2008-09-19 19:11 ` Brandon Casey
0 siblings, 1 reply; 26+ messages in thread
From: Boyd Lynn Gerber @ 2008-09-19 18:14 UTC (permalink / raw)
To: Brandon Casey
Cc: Junio C Hamano, Arjen Laarhoven, Mike Ralphson, Johannes Sixt,
Jeff King, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
So on these patches,
What do you want me to do? I applied them and things kinda worked. The
problem is pine/alpine messes them up a bit and it is not easy to manually
fix them. It would be easier to git clone/pull them from either a site or
the trees. I do think that on some we should use the actual charact vers
the C-syntax. "\t" for example.
--
Boyd Gerber <gerberb@zenez.com>
ZENEZ 1042 East Fort Union #135, Midvale Utah 84047
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern
2008-09-19 18:14 ` Boyd Lynn Gerber
@ 2008-09-19 19:11 ` Brandon Casey
0 siblings, 0 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-19 19:11 UTC (permalink / raw)
To: Boyd Lynn Gerber
Cc: Junio C Hamano, Arjen Laarhoven, Mike Ralphson, Johannes Sixt,
Jeff King, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Boyd Lynn Gerber wrote:
> So on these patches,
> What do you want me to do? I applied them and things kinda worked.
Test t4018-diff-funcname.sh should pass. As Johan mentioned, in the
last patch '\\s' should have been converted to '[ \t]', rather than
to just ' ', but that should not affect the test and should only
affect html, python, and ruby patterns.
It would be nice if the tests were expanded.
> The
> problem is pine/alpine messes them up a bit and it is not easy to
> manually fix them. It would be easier to git clone/pull them from
> either a site or the trees.
I expect they will be in Junio's tree soon, most likely the master
branch.
>I do think that on some we should use the
> actual charact vers the C-syntax. "\t" for example.
"\t" is safe to use since it is interpreted by git in config.c: parse_value(),
not by the regex library. I was also concerned about that character until I
traced the code to parse_value(). Junio is right that better documentation of
this feature is needed.
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH v2 2/4] diff.c: associate a flag with each pattern and use it for compiling regex
[not found] <7vskry1485.fsf@gitster.siamese.dyndns.org>
2008-09-18 22:40 ` [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern Brandon Casey
@ 2008-09-18 22:42 ` Brandon Casey
2008-09-18 22:44 ` [PATCH v2 3/4] diff.*.xfuncname which uses "extended" regex's for hunk header selection Brandon Casey
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
3 siblings, 0 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-18 22:42 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
This is in preparation for allowing extended regular expression patterns.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
diff.c | 27 +++++++++++++++------------
xdiff-interface.c | 4 ++--
xdiff-interface.h | 2 +-
3 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/diff.c b/diff.c
index 3928124..08cdd8f 100644
--- a/diff.c
+++ b/diff.c
@@ -98,13 +98,14 @@ static int parse_lldiff_command(const char *var, const char *ep, const char *val
struct funcname_pattern_entry {
char *name;
char *pattern;
+ int cflags;
};
static struct funcname_pattern_list {
struct funcname_pattern_list *next;
struct funcname_pattern_entry e;
} *funcname_pattern_list;
-static int parse_funcname_pattern(const char *var, const char *ep, const char *value)
+static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags)
{
const char *name;
int namelen;
@@ -124,6 +125,7 @@ static int parse_funcname_pattern(const char *var, const char *ep, const char *v
}
free(pp->e.pattern);
pp->e.pattern = xstrdup(value);
+ pp->e.cflags = cflags;
return 0;
}
@@ -192,7 +194,8 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
if (!strcmp(ep, ".funcname")) {
if (!value)
return config_error_nonbool(var);
- return parse_funcname_pattern(var, ep, value);
+ return parse_funcname_pattern(var, ep, value,
+ 0);
}
}
}
@@ -1396,23 +1399,23 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
}
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
- { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$" },
- { "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$" },
+ { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 },
+ { "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$", 0 },
{ "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
"new\\|return\\|switch\\|throw\\|while\\)\n"
"^[ ]*\\(\\([ ]*"
"[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
- "[ ]*([^;]*\\)$" },
+ "[ ]*([^;]*\\)$", 0 },
{ "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
"destructor\\|interface\\|implementation\\|"
"initialization\\|finalization\\)[ \t]*.*\\)$"
"\\|"
- "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$"
- },
- { "php", "^[\t ]*\\(\\(function\\|class\\).*\\)" },
- { "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$" },
- { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$" },
- { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$" },
+ "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$",
+ 0 },
+ { "php", "^[\t ]*\\(\\(function\\|class\\).*\\)", 0 },
+ { "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$", 0 },
+ { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 },
+ { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 },
};
static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
@@ -1538,7 +1541,7 @@ static void builtin_diff(const char *name_a,
xecfg.ctxlen = o->context;
xecfg.flags = XDL_EMIT_FUNCNAMES;
if (pe)
- xdiff_set_find_func(&xecfg, pe->pattern);
+ xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
if (!diffopts)
;
else if (!prefixcmp(diffopts, "--unified="))
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 944ad98..7f1a7d3 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -218,7 +218,7 @@ static long ff_regexp(const char *line, long len,
return result;
}
-void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value)
+void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value, int cflags)
{
int i;
struct ff_regs *regs;
@@ -243,7 +243,7 @@ void xdiff_set_find_func(xdemitconf_t *xecfg, const char *value)
expression = buffer = xstrndup(value, ep - value);
else
expression = value;
- if (regcomp(®->re, expression, 0))
+ if (regcomp(®->re, expression, cflags))
die("Invalid regexp to look for hunk header: %s", expression);
free(buffer);
value = ep + 1;
diff --git a/xdiff-interface.h b/xdiff-interface.h
index 558492b..23c49b9 100644
--- a/xdiff-interface.h
+++ b/xdiff-interface.h
@@ -16,6 +16,6 @@ int parse_hunk_header(char *line, int len,
int read_mmfile(mmfile_t *ptr, const char *filename);
int buffer_is_binary(const char *ptr, unsigned long size);
-extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line);
+extern void xdiff_set_find_func(xdemitconf_t *xecfg, const char *line, int cflags);
#endif
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH v2 3/4] diff.*.xfuncname which uses "extended" regex's for hunk header selection
[not found] <7vskry1485.fsf@gitster.siamese.dyndns.org>
2008-09-18 22:40 ` [PATCH v2 1/4] diff.c: return pattern entry pointer rather than just the hunk header pattern Brandon Casey
2008-09-18 22:42 ` [PATCH v2 2/4] diff.c: associate a flag with each pattern and use it for compiling regex Brandon Casey
@ 2008-09-18 22:44 ` Brandon Casey
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
3 siblings, 0 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-18 22:44 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Currently, the hunk headers produced by 'diff -p' are customizable by
setting the diff.*.funcname option in the config file. The 'funcname' option
takes a basic regular expression. This functionality was designed using the
GNU regex library which, by default, allows using backslashed versions of
some extended regular expression operators, even in Basic Regular Expression
mode. For example, the following characters, when backslashed, are
interpreted according to the extended regular expression rules: ?, +, and |.
As such, the builtin funcname patterns were created using some extended
regular expression operators.
Other platforms which adhere more strictly to the POSIX spec do not
interpret the backslashed extended RE operators in Basic Regular Expression
mode. This causes the pattern matching for the builtin funcname patterns to
fail on those platforms.
Introduce a new option 'xfuncname' which uses extended regular expressions,
and advertise it _instead_ of funcname. Since most users are on GNU
platforms, the majority of funcname patterns are created and tested there.
Advertising only xfuncname should help to avoid the creation of non-portable
patterns which work with GNU regex but not elsewhere.
Additionally, the extended regular expressions may be less ugly and
complicated compared to the basic RE since many common special operators do
not need to be backslashed.
For example, the GNU Basic RE:
^[ ]*\\(\\(public\\|static\\).*\\)$
becomes the following Extended RE:
^[ ]*((public|static).*)$
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
Documentation/gitattributes.txt | 4 ++--
diff.c | 5 +++++
t/t4018-diff-funcname.sh | 2 +-
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 6f3551d..9a75257 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -288,13 +288,13 @@ for paths.
*.tex diff=tex
------------------------
-Then, you would define "diff.tex.funcname" configuration to
+Then, you would define "diff.tex.xfuncname" configuration to
specify a regular expression that matches a line that you would
want to appear as the hunk header, like this:
------------------------
[diff "tex"]
- funcname = "^\\(\\\\\\(sub\\)*section{.*\\)$"
+ xfuncname = "^(\\\\(sub)*section\\{.*)$"
------------------------
Note. A single level of backslashes are eaten by the
diff --git a/diff.c b/diff.c
index 08cdd8f..9d8fd2b 100644
--- a/diff.c
+++ b/diff.c
@@ -196,6 +196,11 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
return config_error_nonbool(var);
return parse_funcname_pattern(var, ep, value,
0);
+ } else if (!strcmp(ep, ".xfuncname")) {
+ if (!value)
+ return config_error_nonbool(var);
+ return parse_funcname_pattern(var, ep, value,
+ REG_EXTENDED);
}
}
}
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 18bcd97..602d68f 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -58,7 +58,7 @@ test_expect_success 'last regexp must not be negated' '
'
test_expect_success 'alternation in pattern' '
- git config diff.java.funcname "^[ ]*\\(\\(public\\|static\\).*\\)$"
+ git config diff.java.xfuncname "^[ ]*((public|static).*)$" &&
git diff --no-index Beer.java Beer-correct.java |
grep "^@@.*@@ public static void main("
'
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
[not found] <7vskry1485.fsf@gitster.siamese.dyndns.org>
` (2 preceding siblings ...)
2008-09-18 22:44 ` [PATCH v2 3/4] diff.*.xfuncname which uses "extended" regex's for hunk header selection Brandon Casey
@ 2008-09-18 22:47 ` Brandon Casey
2008-09-18 22:59 ` Brandon Casey
` (3 more replies)
3 siblings, 4 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-18 22:47 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
The 'non-GNU' part of this basic RE to extended RE conversion means '\\s' was
converted to ' '.
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
diff.c | 35 ++++++++++++++++++-----------------
1 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/diff.c b/diff.c
index 9d8fd2b..c74f1a3 100644
--- a/diff.c
+++ b/diff.c
@@ -1404,23 +1404,24 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
}
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
- { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 },
- { "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$", 0 },
- { "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
- "new\\|return\\|switch\\|throw\\|while\\)\n"
- "^[ ]*\\(\\([ ]*"
- "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
- "[ ]*([^;]*\\)$", 0 },
- { "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
- "destructor\\|interface\\|implementation\\|"
- "initialization\\|finalization\\)[ \t]*.*\\)$"
- "\\|"
- "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$",
- 0 },
- { "php", "^[\t ]*\\(\\(function\\|class\\).*\\)", 0 },
- { "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$", 0 },
- { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 },
- { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 },
+ { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#\\}\\{~%]*).*$", REG_EXTENDED },
+ { "html", "^ *(<[Hh][1-6] .*>.*)$", REG_EXTENDED },
+ { "java", "!^[ ]*(catch|do|for|if|instanceof|"
+ "new|return|switch|throw|while)\n"
+ "^[ ]*(([ ]*"
+ "[A-Za-z_][A-Za-z_0-9]*){2,}"
+ "[ ]*\\([^;]*)$", REG_EXTENDED },
+ { "pascal", "^((procedure|function|constructor|"
+ "destructor|interface|implementation|"
+ "initialization|finalization)[ \t]*.*)$"
+ "|"
+ "^(.*=[ \t]*(class|record).*)$",
+ REG_EXTENDED },
+ { "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
+ { "python", "^ *((class|def)\\s.*)$", REG_EXTENDED },
+ { "ruby", "^ *((class|module|def) .*)$", REG_EXTENDED },
+ { "tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
+ REG_EXTENDED },
};
static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
@ 2008-09-18 22:59 ` Brandon Casey
2008-09-19 15:59 ` Brandon Casey
2008-09-18 23:40 ` Johan Herland
` (2 subsequent siblings)
3 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-18 22:59 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Brandon Casey wrote:
> + { "java", "!^[ ]*(catch|do|for|if|instanceof|"
> + "new|return|switch|throw|while)\n"
> + "^[ ]*(([ ]*"
> + "[A-Za-z_][A-Za-z_0-9]*){2,}"
I don't understand the last two lines above.
Is it possible for the second bracketed space and tab to match
anything? Wouldn't the first one consume all space and tab?
Assuming it is possible for the second brackets to match
successfully, why would we want to capture this leading
space?
It looks like both of the following lines would match:
' a'
'ab'
but not this
'a'
Would it be better written like:
"^[ ]*(([A-Za-z_][A-Za-z_0-9]*)"
> + "[ ]*\\([^;]*)$", REG_EXTENDED },
<snip>
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-18 22:59 ` Brandon Casey
@ 2008-09-19 15:59 ` Brandon Casey
0 siblings, 0 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-19 15:59 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Brandon Casey wrote:
> Brandon Casey wrote:
>
>> + { "java", "!^[ ]*(catch|do|for|if|instanceof|"
>> + "new|return|switch|throw|while)\n"
>> + "^[ ]*(([ ]*"
>> + "[A-Za-z_][A-Za-z_0-9]*){2,}"
>
> I don't understand the last two lines above.
ok, I figured it out.
I was interpretting the {2,} part in
([ ]*[A-Za-z_][A-Za-z_0-9]*){2,}
as requiring two characters, instead of two or more repetitions of
the pattern inside the parenthesis.
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
2008-09-18 22:59 ` Brandon Casey
@ 2008-09-18 23:40 ` Johan Herland
2008-09-19 15:55 ` Brandon Casey
2008-09-19 20:29 ` Junio C Hamano
2008-09-22 8:29 ` Gustaf Hendeby
3 siblings, 1 reply; 26+ messages in thread
From: Johan Herland @ 2008-09-18 23:40 UTC (permalink / raw)
To: Brandon Casey; +Cc: Git Mailing List
On Friday 19 September 2008, Brandon Casey wrote:
> The 'non-GNU' part of this basic RE to extended RE conversion means '\\s'
> was converted to ' '.
Shouldn't that be '[ \t]' instead? At least I would like that for the HTML
pattern.
Have fun!
...Johan
--
Johan Herland, <johan@herland.net>
www.herland.net
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
2008-09-18 22:59 ` Brandon Casey
2008-09-18 23:40 ` Johan Herland
@ 2008-09-19 20:29 ` Junio C Hamano
2008-09-20 6:58 ` Junio C Hamano
2008-09-20 7:02 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Junio C Hamano
2008-09-22 8:29 ` Gustaf Hendeby
3 siblings, 2 replies; 26+ messages in thread
From: Junio C Hamano @ 2008-09-19 20:29 UTC (permalink / raw)
To: Brandon Casey
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Brandon Casey <casey@nrlssc.navy.mil> writes:
> The 'non-GNU' part of this basic RE to extended RE conversion means '\\s' was
> converted to ' '.
I think a large part of this series should be in 'maint', as the existing
hunk head pattern engine does _not_ work for people without GNU regexp.
I've created two branches to house this topic:
- bc/maint-diff-hunk-header-fix is rebased to 'maint', so that after
testing we can merge it to 'maint' for 1.6.0.3 and later versions.
Its current tip is at 45d9414;
- bc/master-diff-hunk-header-fix forks from the above, and merges later
"language" additions that happened on 'master'. We can merge this
after testing to 'master' for 1.6.1.
Its current tip is at dde4af4.
Neither has [4/4] on it. I'd like two patches so that:
* [PATCH 1/2] applies to bc/maint-diff-hunk-header-fix, so that the
languages in 1.6.0.2 are fixed for non GNU platforms;
* After applying [1/2] to bc/maint-diff-hunk-header-fix, I'll merge the
branch to bc/master-diff-hunk-header-fix and then...
* [PATCH 2/2] applies on top of it to convert new languages that are
supported only on 'master' to use xfuncname.
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-19 20:29 ` Junio C Hamano
@ 2008-09-20 6:58 ` Junio C Hamano
2008-09-20 21:03 ` Brandon Casey
2008-09-22 23:19 ` [PATCH bc/maint-diff-hunk-header] t4018-diff-funcname: test syntax of builtin xfuncname patterns Brandon Casey
2008-09-20 7:02 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Junio C Hamano
1 sibling, 2 replies; 26+ messages in thread
From: Junio C Hamano @ 2008-09-20 6:58 UTC (permalink / raw)
To: Brandon Casey
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Junio C Hamano <gitster@pobox.com> writes:
> Neither has [4/4] on it. I'd like two patches so that:
>
> * [PATCH 1/2] applies to bc/maint-diff-hunk-header-fix, so that the
> languages in 1.6.0.2 are fixed for non GNU platforms;
>
> * After applying [1/2] to bc/maint-diff-hunk-header-fix, I'll merge the
> branch to bc/master-diff-hunk-header-fix and then...
>
> * [PATCH 2/2] applies on top of it to convert new languages that are
> supported only on 'master' to use xfuncname.
Here is [1/2] to be applied on top of 45d9414 (diff.*.xfuncname which uses
"extended" regex's for hunk header selection, 2008-09-18).
Testing appreciated.
-- >8 --
Subject: [PATCH] diff: use extended regexp to find hunk headers
Using ERE elements such as "|" (alternation) by backquoting in BRE
is a GNU extension and should not be done in portable programs.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
diff.c | 31 +++++++++++++++++--------------
1 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/diff.c b/diff.c
index dabb4b4..175a044 100644
--- a/diff.c
+++ b/diff.c
@@ -1399,20 +1399,23 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
}
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
- { "java", "!^[ ]*\\(catch\\|do\\|for\\|if\\|instanceof\\|"
- "new\\|return\\|switch\\|throw\\|while\\)\n"
- "^[ ]*\\(\\([ ]*"
- "[A-Za-z_][A-Za-z_0-9]*\\)\\{2,\\}"
- "[ ]*([^;]*\\)$", 0 },
- { "pascal", "^\\(\\(procedure\\|function\\|constructor\\|"
- "destructor\\|interface\\|implementation\\|"
- "initialization\\|finalization\\)[ \t]*.*\\)$"
- "\\|"
- "^\\(.*=[ \t]*\\(class\\|record\\).*\\)$",
- 0 },
- { "bibtex", "\\(@[a-zA-Z]\\{1,\\}[ \t]*{\\{0,1\\}[ \t]*[^ \t\"@',\\#}{~%]*\\).*$", 0 },
- { "tex", "^\\(\\\\\\(\\(sub\\)*section\\|chapter\\|part\\)\\*\\{0,1\\}{.*\\)$", 0 },
- { "ruby", "^\\s*\\(\\(class\\|module\\|def\\)\\s.*\\)$", 0 },
+ { "java",
+ "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
+ "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
+ REG_EXTENDED },
+ { "pascal",
+ "^((procedure|function|constructor|destructor|interface|"
+ "implementation|initialization|finalization)[ \t]*.*)$"
+ "|"
+ "^(.*=[ \t]*(class|record).*)$",
+ REG_EXTENDED },
+ { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
+ REG_EXTENDED },
+ { "tex",
+ "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
+ REG_EXTENDED },
+ { "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
+ REG_EXTENDED },
};
static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-20 6:58 ` Junio C Hamano
@ 2008-09-20 21:03 ` Brandon Casey
2008-09-20 22:29 ` Junio C Hamano
2008-09-22 23:19 ` [PATCH bc/maint-diff-hunk-header] t4018-diff-funcname: test syntax of builtin xfuncname patterns Brandon Casey
1 sibling, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-20 21:03 UTC (permalink / raw)
To: git
Junio C Hamano <gitster <at> pobox.com> writes:
> Here is [1/2] to be applied on top of 45d9414 (diff.*.xfuncname which uses
> "extended" regex's for hunk header selection, 2008-09-18).
>
> Testing appreciated.
> + { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
> + REG_EXTENDED },
> + { "tex",
> + "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
I think you need double backslash '\\' before '{' in the two places in these
patterns where you only have a single backslash.
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-20 21:03 ` Brandon Casey
@ 2008-09-20 22:29 ` Junio C Hamano
2008-09-22 16:59 ` Brandon Casey
0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2008-09-20 22:29 UTC (permalink / raw)
To: Brandon Casey; +Cc: git
Brandon Casey <drafnel@gmail.com> writes:
> Junio C Hamano <gitster <at> pobox.com> writes:
>
>> Here is [1/2] to be applied on top of 45d9414 (diff.*.xfuncname which uses
>> "extended" regex's for hunk header selection, 2008-09-18).
>>
>> Testing appreciated.
>
>> + { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
>> + REG_EXTENDED },
>> + { "tex",
>> + "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
>
> I think you need double backslash '\\' before '{' in the two places in these
> patterns where you only have a single backslash.
Thanks. Any other nits?
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-20 22:29 ` Junio C Hamano
@ 2008-09-22 16:59 ` Brandon Casey
2008-09-24 0:04 ` Brandon Casey
0 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-22 16:59 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano wrote:
> Brandon Casey <drafnel@gmail.com> writes:
>
>> Junio C Hamano <gitster <at> pobox.com> writes:
>>
>>> Here is [1/2] to be applied on top of 45d9414 (diff.*.xfuncname which uses
>>> "extended" regex's for hunk header selection, 2008-09-18).
>>>
>>> Testing appreciated.
>>> + { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
>>> + REG_EXTENDED },
>>> + { "tex",
>>> + "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
>> I think you need double backslash '\\' before '{' in the two places in these
>> patterns where you only have a single backslash.
>
> Thanks. Any other nits?
Just a single minor one on the "fix multiple regexp semantics" patch.
This:
for (i = 0; i < regs->nr; i++) {
...
}
+ if (regs->nr <= i)
makes me use my brain (I try to avoid that).
I only mention it since I've seen discussions in the past about this sort
of ordering, and you seem to have accepted that it can be confusing to
native english speakers who would "read" the 'if' statement. The ordering
above puts emphasis on the value of regs->nr, when it is actually the value
of 'i' that is being tested (since regs->nr is unchanging).
I'll do some testing on non-GNU platforms today.
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-22 16:59 ` Brandon Casey
@ 2008-09-24 0:04 ` Brandon Casey
2008-09-26 17:49 ` Brandon Casey
0 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-24 0:04 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Brandon Casey wrote:
> I'll do some testing on non-GNU platforms today.
Well, all I've done is compile, and it compiles and runs the
tests correctly. Thought I'd let you know I did at least that.
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-24 0:04 ` Brandon Casey
@ 2008-09-26 17:49 ` Brandon Casey
2008-09-29 21:52 ` [PATCH] diff.c: remove duplicate bibtex pattern introduced by merge 92bb9785 Brandon Casey
0 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-26 17:49 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Junio C Hamano, git
Shawn,
When you get around to merging this series into maint and master,
you'll probably want to redo the merge of Junio's
bc/maint-diff-hunk-header-fix into bc/master-diff-hunk-header-fix.
After applying 'diff hunkpattern: fix misconverted "\{" tex macro introducers'
to bc/maint-diff-hunk-header-fix, he made a mistake when merging
96d1a8e9 into his bc/master-diff-hunk-header-fix which was at 3d8dccd7 to
produce 92bb9785. (gitk fdac6692 makes this easier to see. fdac6692 should
be the current tip of bc/master-diff-hunk...)
The resulting diff.c in 92bb9785, contains _two_ bibtex patterns, one fixed
by the 'diff hunkpattern:...' patch, and one unfixed. The broken bibtex
pattern was eventually fixed, but the duplicate pattern is still there on
the tip of that branch and in next. It would be nice if the merge could be
redone.
Here are the commands, since sometimes it makes more sense this way:
git branch bc/maint-diff-hunk-header-fix 96d1a8e9
git checkout -b bc/master-diff-hunk-header-fix 3d8dccd7
git merge bc/maint-diff-hunk-header-fix
# fix conflict, make sure you choose the right bibtex pattern
# and delete the other. The builtin-funcname patterns were
# also alphabetized on the master branch, so the correct bibtex
# should be moved to the first entry.
# then reapply the other two patches
git checkout bc/maint-diff-hunk-header-fix
git cherry-pick e3bf5e43
git checkout bc/master-diff-hunk-header-fix
git merge bc/maint-diff-hunk-header-fix
git cherry-pick fdac6692
git commit --amend
# Remove Junio's comment about 'fixes bibtex pattern breakage exposed
# by this test'
-brandon
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH] diff.c: remove duplicate bibtex pattern introduced by merge 92bb9785
2008-09-26 17:49 ` Brandon Casey
@ 2008-09-29 21:52 ` Brandon Casey
0 siblings, 0 replies; 26+ messages in thread
From: Brandon Casey @ 2008-09-29 21:52 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Git Mailing List
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
diff.c | 2 --
1 files changed, 0 insertions(+), 2 deletions(-)
diff --git a/diff.c b/diff.c
index b001d7b..7c982b4 100644
--- a/diff.c
+++ b/diff.c
@@ -1439,8 +1439,6 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
REG_EXTENDED },
- { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
- REG_EXTENDED },
{ "tex",
"^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
REG_EXTENDED },
--
1.6.0.2.323.g7c850
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH bc/maint-diff-hunk-header] t4018-diff-funcname: test syntax of builtin xfuncname patterns
2008-09-20 6:58 ` Junio C Hamano
2008-09-20 21:03 ` Brandon Casey
@ 2008-09-22 23:19 ` Brandon Casey
2008-09-22 23:26 ` [PATCH bc/master-diff-hunk-header] " Brandon Casey
1 sibling, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-22 23:19 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
How about something like this on top of your ERE conversion patch.
It will test that regcomp() completes successfully.
-brandon
t/t4018-diff-funcname.sh | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 602d68f..76919a4 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -32,7 +32,18 @@ EOF
sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java
+builtin_patterns="bibtex java pascal ruby tex"
+for p in $builtin_patterns
+do
+ test_expect_success "builtin $p pattern compiles" '
+ echo "*.java diff=$p" > .gitattributes &&
+ git diff --no-index Beer.java Beer-correct.java 2>&1 |
+ test_must_fail grep "fatal" > /dev/null
+ '
+done
+
test_expect_success 'default behaviour' '
+ rm -f .gitattributes &&
git diff --no-index Beer.java Beer-correct.java |
grep "^@@.*@@ public class Beer"
'
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH bc/master-diff-hunk-header] t4018-diff-funcname: test syntax of builtin xfuncname patterns
2008-09-22 23:19 ` [PATCH bc/maint-diff-hunk-header] t4018-diff-funcname: test syntax of builtin xfuncname patterns Brandon Casey
@ 2008-09-22 23:26 ` Brandon Casey
2008-09-23 0:49 ` [PATCH] diff funcname_pattern: Allow HTML header tags without attributes Johan Herland
0 siblings, 1 reply; 26+ messages in thread
From: Brandon Casey @ 2008-09-22 23:26 UTC (permalink / raw)
To: Junio C Hamano
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Signed-off-by: Brandon Casey <casey@nrlssc.navy.mil>
---
And then this goes on top of bc/master-diff-hunk-header once
bc/maint-diff-hunk-header with the previous patch is merged in.
-brandon
t/t4018-diff-funcname.sh | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/t/t4018-diff-funcname.sh b/t/t4018-diff-funcname.sh
index 76919a4..5b58f50 100755
--- a/t/t4018-diff-funcname.sh
+++ b/t/t4018-diff-funcname.sh
@@ -32,7 +32,7 @@ EOF
sed 's/beer\\/beer,\\/' < Beer.java > Beer-correct.java
-builtin_patterns="bibtex java pascal ruby tex"
+builtin_patterns="bibtex html java pascal php python ruby tex"
for p in $builtin_patterns
do
test_expect_success "builtin $p pattern compiles" '
--
1.6.0.1.244.gdc19
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH] diff funcname_pattern: Allow HTML header tags without attributes
2008-09-22 23:26 ` [PATCH bc/master-diff-hunk-header] " Brandon Casey
@ 2008-09-23 0:49 ` Johan Herland
2008-09-23 1:46 ` Junio C Hamano
0 siblings, 1 reply; 26+ messages in thread
From: Johan Herland @ 2008-09-23 0:49 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Brandon Casey
Signed-off-by: Johan Herland <johan@herland.net>
---
On Tuesday 23 September 2008, Brandon Casey wrote:
> And then this goes on top of bc/master-diff-hunk-header once
> bc/maint-diff-hunk-header with the previous patch is merged in.
After looking over this once more, I think the HTML regexp should be
changed as follows. This fixes a buglet that was part of my original
HTML pattern, and although this patch textually depends on Brandon's
work, it is conceptually independent of his refactorization.
Have fun!
...Johan
diff.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/diff.c b/diff.c
index d0e7319..c8b72f4 100644
--- a/diff.c
+++ b/diff.c
@@ -1424,7 +1424,7 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
REG_EXTENDED },
- { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
+ { "html", "^[ \t]*(<[Hh][1-6]([ \t].*)?>.*)$", REG_EXTENDED },
{ "java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
--
1.6.0.2.405.g3cc38
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH] diff funcname_pattern: Allow HTML header tags without attributes
2008-09-23 0:49 ` [PATCH] diff funcname_pattern: Allow HTML header tags without attributes Johan Herland
@ 2008-09-23 1:46 ` Junio C Hamano
2008-09-23 2:05 ` Johan Herland
0 siblings, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2008-09-23 1:46 UTC (permalink / raw)
To: Johan Herland; +Cc: git, Brandon Casey
Johan Herland <johan@herland.net> writes:
> After looking over this once more, I think the HTML regexp should be
> changed as follows. This fixes a buglet that was part of my original
> HTML pattern, and although this patch textually depends on Brandon's
> work, it is conceptually independent of his refactorization.
> ...
> - { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
> + { "html", "^[ \t]*(<[Hh][1-6]([ \t].*)?>.*)$", REG_EXTENDED },
I do not think these two particularly would make much difference. Why
isn't it simply...
"<[Hh][1-6].*"
without even any capture or anchor?
It would falsely hit oddball cases like <h1foo> which is not <h1>, but
anybody who uses such a nonstandard thing deserves it, imnvho ;-).
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH] diff funcname_pattern: Allow HTML header tags without attributes
2008-09-23 1:46 ` Junio C Hamano
@ 2008-09-23 2:05 ` Johan Herland
0 siblings, 0 replies; 26+ messages in thread
From: Johan Herland @ 2008-09-23 2:05 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Brandon Casey
On Tuesday 23 September 2008, Junio C Hamano wrote:
> Johan Herland <johan@herland.net> writes:
> > After looking over this once more, I think the HTML regexp should be
> > changed as follows. This fixes a buglet that was part of my original
> > HTML pattern, and although this patch textually depends on Brandon's
> > work, it is conceptually independent of his refactorization.
> > ...
> > - { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
> > + { "html", "^[ \t]*(<[Hh][1-6]([ \t].*)?>.*)$", REG_EXTENDED },
>
> I do not think these two particularly would make much difference. Why
> isn't it simply...
>
> "<[Hh][1-6].*"
>
> without even any capture or anchor?
>
> It would falsely hit oddball cases like <h1foo> which is not <h1>, but
> anybody who uses such a nonstandard thing deserves it, imnvho ;-).
Ok. I agree. Go ahead.
...Johan
--
Johan Herland, <johan@herland.net>
www.herland.net
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-19 20:29 ` Junio C Hamano
2008-09-20 6:58 ` Junio C Hamano
@ 2008-09-20 7:02 ` Junio C Hamano
2008-09-20 7:51 ` Junio C Hamano
1 sibling, 1 reply; 26+ messages in thread
From: Junio C Hamano @ 2008-09-20 7:02 UTC (permalink / raw)
To: Brandon Casey
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Junio C Hamano <gitster@pobox.com> writes:
> Neither has [4/4] on it. I'd like two patches so that:
>
> * [PATCH 1/2] applies to bc/maint-diff-hunk-header-fix, so that the
> languages in 1.6.0.2 are fixed for non GNU platforms;
>
> * After applying [1/2] to bc/maint-diff-hunk-header-fix, I'll merge the
> branch to bc/master-diff-hunk-header-fix and then...
>
> * [PATCH 2/2] applies on top of it to convert new languages that are
> supported only on 'master' to use xfuncname.
If you apply the previous one on top of 45d9414 (diff.*.xfuncname which
uses "extended" regex's for hunk header selection, 2008-09-18), and then
merge the result to dde4af4 (Merge branch 'bc/maint-diff-hunk-header-fix'
into bc/master-diff-hunk-header-fix, 2008-09-18), you will get some
trivial conflicts. This patch is [2/2] that applies on top of that merge,
and should result in this:
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
REG_EXTENDED },
{ "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
{ "java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
REG_EXTENDED },
{ "pascal",
"^((procedure|function|constructor|destructor|interface|"
"implementation|initialization|finalization)[ \t]*.*)$"
"|"
"^(.*=[ \t]*(class|record).*)$",
REG_EXTENDED },
{ "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
{ "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
REG_EXTENDED },
{ "tex",
"^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
REG_EXTENDED },
};
-- >8 --
Subject: [PATCH] diff: use extended regexp to find hunk headers
Using ERE elements such as "|" (alternation) by backquoting in BRE
is a GNU extension and should not be done in portable programs.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
diff.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/diff.c b/diff.c
index 5b9b074..a733010 100644
--- a/diff.c
+++ b/diff.c
@@ -1406,7 +1406,7 @@ static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
REG_EXTENDED },
- { "html", "^\\s*\\(<[Hh][1-6]\\s.*>.*\\)$", 0 },
+ { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
{ "java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
@@ -1417,8 +1417,8 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
"|"
"^(.*=[ \t]*(class|record).*)$",
REG_EXTENDED },
- { "php", "^[\t ]*\\(\\(function\\|class\\).*\\)", 0 },
- { "python", "^\\s*\\(\\(class\\|def\\)\\s.*\\)$", 0 },
+ { "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
+ { "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
REG_EXTENDED },
{ "tex",
--
1.6.0.2.414.g0bf11
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-20 7:02 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Junio C Hamano
@ 2008-09-20 7:51 ` Junio C Hamano
0 siblings, 0 replies; 26+ messages in thread
From: Junio C Hamano @ 2008-09-20 7:51 UTC (permalink / raw)
To: Brandon Casey
Cc: Arjen Laarhoven, Mike Ralphson, Johannes Sixt, Jeff King,
Boyd Lynn Gerber, Git Mailing List, Avery Pennarun, Johan Herland,
Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Gustaf Hendeby, Jonathan del Strother
Junio C Hamano <gitster@pobox.com> writes:
> static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
> { "bibtex", "(@[a-zA-Z]{1,}[ \t]*\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
> REG_EXTENDED },
> { "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
> { "java",
> "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
> "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
> REG_EXTENDED },
> { "pascal",
> "^((procedure|function|constructor|destructor|interface|"
> "implementation|initialization|finalization)[ \t]*.*)$"
> "|"
> "^(.*=[ \t]*(class|record).*)$",
> REG_EXTENDED },
> { "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
> { "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
> { "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
> REG_EXTENDED },
> { "tex",
> "^(\\\\((sub)*section|chapter|part)\\*{0,1}\{.*)$",
> REG_EXTENDED },
> };
On the "Objective-C hunk header" topic, I mentioned that I wondered how a
construct like:
"( A | B ) | ( C | D )"
could possibly work, given that xdiff-interface.c::ff_regexp() uses a
regmatch_t array with only two elements to capture (which means it can
only capture $0 and $1).
It turns out that the function is not really prepared to do this
properly. In order to cope with a pattern without _any_ capturing
parentheses in the regexp, it has a trick to use $0 if $1 is undefined.
In other words, if you write:
"junk ( A | B ) | garbage ( C | D )"
then lines "junk A", "junk B", "garbage C" and "garbage D" would all match
and be on the hunk header line. However, "garbage" is not stripped out
(while "junk" does get left out of the capture).
This happens not to be a problem with any of the above built-in patterns.
The risky one is "pascal", but it's $2 captures the entire line anyway, so
it does not make any difference if the code used $0 instead. E.g. a line
that matches the second alternative, "foo = class bar", will be captured
as $0 and this is the same as (un)captured $2.
But it would be a good idea to fix this while our attention to the issue
is still fresh. As we discussed earlier, we can replace the top-level "|"
with "\n" and fix the semantics of multiple positive regexp to grab $1 (or
$0 if $1 is unavaialble) of the first positive one. If we did so, when
the pattern
"junk ( A | B ) | garbage ( C | D )"
matches a line "garbage C", the initial garbage part will not be included
inside the capture.
Here is such a proposed fix.
-- >8 --
diff: fix "multiple regexp" semantics to find hunk header comment
When multiple regular expressions are concatenated with "\n", they were
traditionally AND'ed together, and only a line that matches _all_ of them
were taken as a match. This however is unwieldy when multiple regexp
feature is used to specify alternatives.
This fixes the semantics to take the first match. A nagative pattern, if
matches, makes the line to fail. A positive pattern, if matches, will be
the final match and what it captures in $1 is used as the hunk header
comment.
We could write alternatives using "|" in ERE, but the machinery can only
use captured $1 as the hunk header comment (or $0 if there is no match in
$1), so you cannot write:
"junk ( A | B ) | garbage ( C | D )"
and expect both "junk" and "garbage" to get stripped with the existing
code. With this fix, you can write it as:
"junk ( A | B ) \n garbage ( C | D )"
and the way capture works would match the user expectation more
naturally.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
diff.c | 2 +-
xdiff-interface.c | 17 ++++++++++-------
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git c/diff.c w/diff.c
index a733010..1bcbbd5 100644
--- c/diff.c
+++ w/diff.c
@@ -1414,7 +1414,7 @@ static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
{ "pascal",
"^((procedure|function|constructor|destructor|interface|"
"implementation|initialization|finalization)[ \t]*.*)$"
- "|"
+ "\n"
"^(.*=[ \t]*(class|record).*)$",
REG_EXTENDED },
{ "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
diff --git c/xdiff-interface.c w/xdiff-interface.c
index 7f1a7d3..6c6bb19 100644
--- c/xdiff-interface.c
+++ w/xdiff-interface.c
@@ -194,26 +194,29 @@ static long ff_regexp(const char *line, long len,
char *line_buffer = xstrndup(line, len); /* make NUL terminated */
struct ff_regs *regs = priv;
regmatch_t pmatch[2];
- int result = 0, i;
+ int i;
+ int result = -1;
for (i = 0; i < regs->nr; i++) {
struct ff_reg *reg = regs->array + i;
- if (reg->negate ^ !!regexec(®->re,
- line_buffer, 2, pmatch, 0)) {
- free(line_buffer);
- return -1;
+ if (!regexec(®->re, line_buffer, 2, pmatch, 0)) {
+ if (reg->negate)
+ goto fail;
+ break;
}
}
+ if (regs->nr <= i)
+ goto fail;
i = pmatch[1].rm_so >= 0 ? 1 : 0;
line += pmatch[i].rm_so;
result = pmatch[i].rm_eo - pmatch[i].rm_so;
if (result > buffer_size)
result = buffer_size;
else
- while (result > 0 && (isspace(line[result - 1]) ||
- line[result - 1] == '\n'))
+ while (result > 0 && (isspace(line[result - 1])))
result--;
memcpy(buffer, line, result);
+ fail:
free(line_buffer);
return result;
}
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax
2008-09-18 22:47 ` [PATCH v2 4/4] diff.c: convert builtin funcname patterns to non-GNU extended regex syntax Brandon Casey
` (2 preceding siblings ...)
2008-09-19 20:29 ` Junio C Hamano
@ 2008-09-22 8:29 ` Gustaf Hendeby
3 siblings, 0 replies; 26+ messages in thread
From: Gustaf Hendeby @ 2008-09-22 8:29 UTC (permalink / raw)
To: Brandon Casey
Cc: Junio C Hamano, Arjen Laarhoven, Mike Ralphson, Johannes Sixt,
Jeff King, Boyd Lynn Gerber, Git Mailing List, Avery Pennarun,
Johan Herland, Andreas Ericsson, Kirill Smelkov, Giuseppe Bilotta,
Jonathan del Strother
On 09/19/2008 12:47 AM, Brandon Casey wrote:
> The 'non-GNU' part of this basic RE to extended RE conversion means '\\s' was
> converted to ' '.
I've tested the converted BibTeX pattern and it seems to work as expected.
/Gustaf
^ permalink raw reply [flat|nested] 26+ messages in thread