* [PATCH 1/2] grep -w: forward to next possible position after rejected match
@ 2009-01-09 23:08 René Scharfe
2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe
0 siblings, 1 reply; 12+ messages in thread
From: René Scharfe @ 2009-01-09 23:08 UTC (permalink / raw)
To: Git Mailing List; +Cc: Junio C Hamano
grep -w accepts matches between non-word characters, only. If a match
from regexec() doesn't meet this criteria, grep continues its search
after the first character of that match.
We can be a bit smarter here and skip all positions that follow a word
character first, as they can't match our criteria. This way we can
consume characters quite cheaply and don't need to special-case the
handling of the beginning of a line.
Here's a contrived example command on msysgit (best of five runs):
$ time git grep -w ...... v1.6.1 >/dev/null
real 0m1.611s
user 0m0.000s
sys 0m0.015s
With the patch it's quite a bit faster:
$ time git grep -w ...... v1.6.1 >/dev/null
real 0m1.179s
user 0m0.000s
sys 0m0.015s
More common search patterns will gain a lot less, but it's a nice clean
up anyway.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
grep.c | 11 +++++++----
1 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/grep.c b/grep.c
index 49e9319..394703b 100644
--- a/grep.c
+++ b/grep.c
@@ -294,7 +294,6 @@ static struct {
static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
{
int hit = 0;
- int at_true_bol = 1;
int saved_ch = 0;
regmatch_t pmatch[10];
@@ -337,7 +336,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
* either end of the line, or at word boundary
* (i.e. the next char must not be a word char).
*/
- if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
+ if ( ((pmatch[0].rm_so == 0) ||
!word_char(bol[pmatch[0].rm_so-1])) &&
((pmatch[0].rm_eo == (eol-bol)) ||
!word_char(bol[pmatch[0].rm_eo])) )
@@ -349,10 +348,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
/* There could be more than one match on the
* line, and the first match might not be
* strict word match. But later ones could be!
+ * Forward to the next possible start, i.e. the
+ * next position following a non-word char.
*/
bol = pmatch[0].rm_so + bol + 1;
- at_true_bol = 0;
- goto again;
+ while (word_char(bol[-1]) && bol < eol)
+ bol++;
+ if (bol < eol)
+ goto again;
}
}
if (p->token == GREP_PATTERN_HEAD && saved_ch)
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-09 23:08 [PATCH 1/2] grep -w: forward to next possible position after rejected match René Scharfe
@ 2009-01-09 23:18 ` René Scharfe
2009-01-10 20:37 ` Junio C Hamano
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: René Scharfe @ 2009-01-09 23:18 UTC (permalink / raw)
To: Git Mailing List; +Cc: Junio C Hamano
Add the new flag "fixed" to struct grep_pat and set it if the pattern
is doesn't contain any regex control characters in addition to if the
flag -F/--fixed-strings was specified.
This gives a nice speed up on msysgit, where regexec() seems to be
extra slow. Before (best of five runs):
$ time git grep grep v1.6.1 >/dev/null
real 0m0.552s
user 0m0.000s
sys 0m0.000s
$ time git grep -F grep v1.6.1 >/dev/null
real 0m0.170s
user 0m0.000s
sys 0m0.015s
With the patch:
$ time git grep grep v1.6.1 >/dev/null
real 0m0.173s
user 0m0.000s
sys 0m0.000s
The difference is much smaller on Linux, but still measurable.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
grep.c | 29 +++++++++++++++++++++++++----
grep.h | 1 +
2 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/grep.c b/grep.c
index 394703b..a1092df 100644
--- a/grep.c
+++ b/grep.c
@@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
p->next = NULL;
}
+static int isregexspecial(int c)
+{
+ return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
+ c == '.' || c == '^' || c == '{' || c == '|';
+}
+
+static int is_fixed(const char *s)
+{
+ while (!isregexspecial(*s))
+ s++;
+ return !*s;
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
- int err = regcomp(&p->regexp, p->pattern, opt->regflags);
+ int err;
+
+ if (opt->fixed || is_fixed(p->pattern))
+ p->fixed = 1;
+ if (opt->regflags & REG_ICASE)
+ p->fixed = 0;
+ if (p->fixed)
+ return;
+
+ err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
char where[1024];
@@ -159,8 +181,7 @@ void compile_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- if (!opt->fixed)
- compile_regexp(p, opt);
+ compile_regexp(p, opt);
break;
default:
opt->extended = 1;
@@ -314,7 +335,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
}
again:
- if (!opt->fixed) {
+ if (!p->fixed) {
regex_t *exp = &p->regexp;
hit = !regexec(exp, bol, ARRAY_SIZE(pmatch),
pmatch, 0);
diff --git a/grep.h b/grep.h
index 45a222d..5102ce3 100644
--- a/grep.h
+++ b/grep.h
@@ -30,6 +30,7 @@ struct grep_pat {
const char *pattern;
enum grep_header_field field;
regex_t regexp;
+ unsigned fixed:1;
};
enum grep_expr_node {
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe
@ 2009-01-10 20:37 ` Junio C Hamano
2009-01-12 12:25 ` Mikael Magnusson
2009-01-12 15:32 ` Alex Riesen
2 siblings, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2009-01-10 20:37 UTC (permalink / raw)
To: René Scharfe; +Cc: Git Mailing List
René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
> Add the new flag "fixed" to struct grep_pat and set it if the pattern
> is doesn't contain any regex control characters in addition to if the
> flag -F/--fixed-strings was specified.
>
> This gives a nice speed up on msysgit, where regexec() seems to be
> extra slow. Before (best of five runs):
Thanks, and...
> static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
> {
> - int err = regcomp(&p->regexp, p->pattern, opt->regflags);
> + int err;
> +
> + if (opt->fixed || is_fixed(p->pattern))
> + p->fixed = 1;
> + if (opt->regflags & REG_ICASE)
> + p->fixed = 0;
... thanks again for being extra careful. That's why I *love* your
patches.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe
2009-01-10 20:37 ` Junio C Hamano
@ 2009-01-12 12:25 ` Mikael Magnusson
2009-01-12 13:33 ` Johannes Schindelin
2009-01-12 15:32 ` Alex Riesen
2 siblings, 1 reply; 12+ messages in thread
From: Mikael Magnusson @ 2009-01-12 12:25 UTC (permalink / raw)
To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano
2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>:
> Add the new flag "fixed" to struct grep_pat and set it if the pattern
> is doesn't contain any regex control characters in addition to if the
> flag -F/--fixed-strings was specified.
>
> diff --git a/grep.c b/grep.c
> index 394703b..a1092df 100644
> --- a/grep.c
> +++ b/grep.c
> @@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
> p->next = NULL;
> }
>
> +static int isregexspecial(int c)
> +{
> + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
> + c == '.' || c == '^' || c == '{' || c == '|';
> +}
Shouldn't this include '*' and '\'?
--
Mikael Magnusson
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-12 12:25 ` Mikael Magnusson
@ 2009-01-12 13:33 ` Johannes Schindelin
0 siblings, 0 replies; 12+ messages in thread
From: Johannes Schindelin @ 2009-01-12 13:33 UTC (permalink / raw)
To: Mikael Magnusson; +Cc: René Scharfe, Git Mailing List, Junio C Hamano
[-- Attachment #1: Type: TEXT/PLAIN, Size: 844 bytes --]
Hi,
On Mon, 12 Jan 2009, Mikael Magnusson wrote:
> 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>:
> > Add the new flag "fixed" to struct grep_pat and set it if the pattern
> > is doesn't contain any regex control characters in addition to if the
> > flag -F/--fixed-strings was specified.
> >
> > diff --git a/grep.c b/grep.c
> > index 394703b..a1092df 100644
> > --- a/grep.c
> > +++ b/grep.c
> > @@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
> > p->next = NULL;
> > }
> >
> > +static int isregexspecial(int c)
> > +{
> > + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
> > + c == '.' || c == '^' || c == '{' || c == '|';
> > +}
>
> Shouldn't this include '*' and '\'?
This is covered by isspecial(): see ctype.c.
Hth,
Dscho
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe
2009-01-10 20:37 ` Junio C Hamano
2009-01-12 12:25 ` Mikael Magnusson
@ 2009-01-12 15:32 ` Alex Riesen
2009-01-12 19:18 ` René Scharfe
2 siblings, 1 reply; 12+ messages in thread
From: Alex Riesen @ 2009-01-12 15:32 UTC (permalink / raw)
To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano
2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>:
> +static int isregexspecial(int c)
> +{
> + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
> + c == '.' || c == '^' || c == '{' || c == '|';
> +}
> +
> +static int is_fixed(const char *s)
> +{
> + while (!isregexspecial(*s))
> + s++;
> + return !*s;
> +}
strchr?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-12 15:32 ` Alex Riesen
@ 2009-01-12 19:18 ` René Scharfe
2009-01-13 8:13 ` Junio C Hamano
0 siblings, 1 reply; 12+ messages in thread
From: René Scharfe @ 2009-01-12 19:18 UTC (permalink / raw)
To: Alex Riesen; +Cc: Git Mailing List, Junio C Hamano
Alex Riesen schrieb:
> 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>:
>> +static int isregexspecial(int c)
>> +{
>> + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
>> + c == '.' || c == '^' || c == '{' || c == '|';
>> +}
>> +
>> +static int is_fixed(const char *s)
>> +{
>> + while (!isregexspecial(*s))
>> + s++;
>> + return !*s;
>> +}
>
> strchr?
Oh, yes, that would look nicer.
Another option is to extend ctype.c and implement isregexspecial() --
and while we're at it islowerxdigit() (builtin-name-rev.c::ishex()) and
iswordchar() (config.c::iskeychar(), grep.c::word_char()), too -- as
table lookups. I.e., something like the following (untested).
Which of the mentioned functions are really worth of this promotion?
The isregexspecial() char class has more members than isspecial(), but
it's not performance critical (unless you have a lot of patterns and
only a small amount of data to grep :).
Are there more candidates for ctype-ification?
René
ctype.c | 14 ++++++++++----
git-compat-util.h | 6 ++++++
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/ctype.c b/ctype.c
index 9208d67..1a76586 100644
--- a/ctype.c
+++ b/ctype.c
@@ -10,20 +10,26 @@
#undef AA
#undef DD
#undef GS
+#undef RR
+#undef US
+#undef Ah
#define SS GIT_SPACE
#define AA GIT_ALPHA
#define DD GIT_DIGIT
#define GS GIT_SPECIAL /* \0, *, ?, [, \\ */
+#define RR GIT_REGEX_SPECIAL /* $, (, ), +, ., ^, {, | */
+#define US GIT_UNDERSCORE
+#define Ah (GIT_ALPHA | GIT_LOWER_XDIGIT)
unsigned char sane_ctype[256] = {
GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */
- SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */
+ SS, 0, 0, 0, RR, 0, 0, 0, RR, RR, GS, RR, 0, 0, RR, 0, /* 32-15 */
DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */
0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */
- 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */
+ AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, RR, US, /* 80-15 */
+ 0, Ah, Ah, Ah, Ah, Ah, Ah, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */
+ AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, RR, RR, 0, 0, 0, /* 112-15 */
/* Nothing in the 128.. range */
};
diff --git a/git-compat-util.h b/git-compat-util.h
index e20b1e8..5eaa662 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -328,12 +328,18 @@ extern unsigned char sane_ctype[256];
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define GIT_UNDERSCORE 0x20
+#define GIT_LOWER_XDIGIT 0x40
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isspecial(x) sane_istest(x,GIT_SPECIAL)
+#define isregexspecial(x) sane_istest(x,GIT_SPECIAL | GIT_REGEX_SPECIAL)
+#define iswordchar(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT | GIT_UNDERSCORE)
+#define islowerxdigit(x) sane_istest(x,GIT_DIGIT | GIT_LOWER_XDIGIT)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings
2009-01-12 19:18 ` René Scharfe
@ 2009-01-13 8:13 ` Junio C Hamano
2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Junio C Hamano @ 2009-01-13 8:13 UTC (permalink / raw)
To: René Scharfe; +Cc: Alex Riesen, Git Mailing List
René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
> +#define RR GIT_REGEX_SPECIAL /* $, (, ), +, ., ^, {, | */
> +#define US GIT_UNDERSCORE
> +#define Ah (GIT_ALPHA | GIT_LOWER_XDIGIT)
>
> unsigned char sane_ctype[256] = {
> GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */
Mental note. NUL is marked as GIT_SPECIAL.
> #define isspecial(x) sane_istest(x,GIT_SPECIAL)
> +#define isregexspecial(x) sane_istest(x,GIT_SPECIAL | GIT_REGEX_SPECIAL)
Perhaps isspecial() is misnamed if we were to enhance git-ctype in this
way. It is about a byte being shell glob pattern or a NUL (!!!), and it
should be renamed to isglobspecial() or something.
dir.c uses isspecial() in two places, and both callers rely on NUL being a
part of special to terminate the loops they are in, like this:
for (;;) {
unsigned char c = *match++;
len++;
if (isspecial(c))
return len;
}
It may be a cunning and cute logic, but I do not particularly like it. It
might be cleaner to rename it to isglobspecial(), drop NUL from it, and
have these two call existing call sites to explicitly check for (c == NUL)
for loop termination.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/4] Add ctype test
2009-01-13 8:13 ` Junio C Hamano
@ 2009-01-17 15:50 ` René Scharfe
2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe
` (2 subsequent siblings)
3 siblings, 0 replies; 12+ messages in thread
From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List
Manipulating the character class table in ctype.c by hand is error prone.
To ensure that typos are found quickly, add a test program and script.
test-ctype checks the output of the character class macros isspace() et.
al. by applying them on all possible char values and consulting a list of
all characters in the particular class. It doesn't check tolower() and
toupper(); this could be added later.
The test script t0070-fundamental.sh is created because there is no good
place for the ctype test, yet -- except for t0000-basic.sh perhaps, but
it doesn't run well on Windows, yet.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Makefile | 3 ++
t/t0070-fundamental.sh | 15 +++++++++++
test-ctype.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+), 0 deletions(-)
create mode 100755 t/t0070-fundamental.sh
create mode 100644 test-ctype.c
diff --git a/Makefile b/Makefile
index 4b1d488..dca61f5 100644
--- a/Makefile
+++ b/Makefile
@@ -1360,6 +1360,7 @@ endif
### Testing rules
TEST_PROGRAMS += test-chmtime$X
+TEST_PROGRAMS += test-ctype$X
TEST_PROGRAMS += test-date$X
TEST_PROGRAMS += test-delta$X
TEST_PROGRAMS += test-genrandom$X
@@ -1379,6 +1380,8 @@ export NO_SVN_TESTS
test: all
$(MAKE) -C t/ all
+test-ctype$X: ctype.o
+
test-date$X: date.o ctype.o
test-delta$X: diff-delta.o patch-delta.o
diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh
new file mode 100755
index 0000000..680d7d6
--- /dev/null
+++ b/t/t0070-fundamental.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+test_description='check that the most basic functions work
+
+
+Verify wrappers and compatibility functions.
+'
+
+. ./test-lib.sh
+
+test_expect_success 'character classes (isspace, isalpha etc.)' '
+ test-ctype
+'
+
+test_done
diff --git a/test-ctype.c b/test-ctype.c
new file mode 100644
index 0000000..723eff4
--- /dev/null
+++ b/test-ctype.c
@@ -0,0 +1,66 @@
+#include "cache.h"
+
+
+static int test_isdigit(int c)
+{
+ return isdigit(c);
+}
+
+static int test_isspace(int c)
+{
+ return isspace(c);
+}
+
+static int test_isalpha(int c)
+{
+ return isalpha(c);
+}
+
+static int test_isalnum(int c)
+{
+ return isalnum(c);
+}
+
+#define DIGIT "0123456789"
+#define LOWER "abcdefghijklmnopqrstuvwxyz"
+#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+static const struct ctype_class {
+ const char *name;
+ int (*test_fn)(int);
+ const char *members;
+} classes[] = {
+ { "isdigit", test_isdigit, DIGIT },
+ { "isspace", test_isspace, " \n\r\t" },
+ { "isalpha", test_isalpha, LOWER UPPER },
+ { "isalnum", test_isalnum, LOWER UPPER DIGIT },
+ { NULL }
+};
+
+static int test_class(const struct ctype_class *test)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < 256; i++) {
+ int expected = i ? !!strchr(test->members, i) : 0;
+ int actual = test->test_fn(i);
+
+ if (actual != expected) {
+ rc = 1;
+ printf("%s classifies char %d (0x%02x) wrongly\n",
+ test->name, i, i);
+ }
+ }
+ return rc;
+}
+
+int main(int argc, char **argv)
+{
+ const struct ctype_class *test;
+ int rc = 0;
+
+ for (test = classes; test->name; test++)
+ rc |= test_class(test);
+
+ return rc;
+}
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/4] Reformat ctype.c
2009-01-13 8:13 ` Junio C Hamano
2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe
@ 2009-01-17 15:50 ` René Scharfe
2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe
2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe
3 siblings, 0 replies; 12+ messages in thread
From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List
Enhance the readability of ctype.c by using an enum instead of macros
to initialize the character class table. This allows the use of a single
letter to mark a char, making the table fit within 80 columns.
Also list the index of the last entry in each row in the following comment.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
ctype.c | 32 ++++++++++++++------------------
1 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/ctype.c b/ctype.c
index 9208d67..6528687 100644
--- a/ctype.c
+++ b/ctype.c
@@ -5,25 +5,21 @@
*/
#include "cache.h"
-/* Just so that no insane platform contaminate namespace with these symbols */
-#undef SS
-#undef AA
-#undef DD
-#undef GS
-
-#define SS GIT_SPACE
-#define AA GIT_ALPHA
-#define DD GIT_DIGIT
-#define GS GIT_SPECIAL /* \0, *, ?, [, \\ */
+enum {
+ S = GIT_SPACE,
+ A = GIT_ALPHA,
+ D = GIT_DIGIT,
+ G = GIT_SPECIAL, /* \0, *, ?, [, \\ */
+};
unsigned char sane_ctype[256] = {
- GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */
- SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */
- DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */
- 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */
- 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */
- AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */
+ G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
+ S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
+ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
+ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */
+ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
+ A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */
/* Nothing in the 128.. range */
};
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/4] Change NUL char handling of isspecial()
2009-01-13 8:13 ` Junio C Hamano
2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe
2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe
@ 2009-01-17 15:50 ` René Scharfe
2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe
3 siblings, 0 replies; 12+ messages in thread
From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List
Replace isspecial() by the new macro is_glob_special(), which is more,
well, specialized. The former included the NUL char in its character
class, while the letter only included characters that are special to
file name globbing.
The new name contains underscores because they enhance readability
considerably now that it's made up of three words. Renaming the
function is necessary to document its changed scope.
The call sites of isspecial() are updated to check explicitly for NUL.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
This patch applies to next (plus the previous ones in this series).
ctype.c | 4 ++--
dir.c | 4 ++--
git-compat-util.h | 4 ++--
grep.c | 5 +++--
test-ctype.c | 6 ++++++
5 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/ctype.c b/ctype.c
index 6528687..9de187c 100644
--- a/ctype.c
+++ b/ctype.c
@@ -9,11 +9,11 @@ enum {
S = GIT_SPACE,
A = GIT_ALPHA,
D = GIT_DIGIT,
- G = GIT_SPECIAL, /* \0, *, ?, [, \\ */
+ G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
};
unsigned char sane_ctype[256] = {
- G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
diff --git a/dir.c b/dir.c
index 7c59829..d55a41a 100644
--- a/dir.c
+++ b/dir.c
@@ -75,7 +75,7 @@ static int match_one(const char *match, const char *name, int namelen)
for (;;) {
unsigned char c1 = *match;
unsigned char c2 = *name;
- if (isspecial(c1))
+ if (c1 == '\0' || is_glob_special(c1))
break;
if (c1 != c2)
return 0;
@@ -678,7 +678,7 @@ static int simple_length(const char *match)
for (;;) {
unsigned char c = *match++;
len++;
- if (isspecial(c))
+ if (c == '\0' || is_glob_special(c))
return len;
}
}
diff --git a/git-compat-util.h b/git-compat-util.h
index e20b1e8..7c92588 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -327,13 +327,13 @@ extern unsigned char sane_ctype[256];
#define GIT_SPACE 0x01
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
-#define GIT_SPECIAL 0x08
+#define GIT_GLOB_SPECIAL 0x08
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
-#define isspecial(x) sane_istest(x,GIT_SPECIAL)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
diff --git a/grep.c b/grep.c
index 6485760..f9a4525 100644
--- a/grep.c
+++ b/grep.c
@@ -30,8 +30,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
static int isregexspecial(int c)
{
- return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' ||
- c == '.' || c == '^' || c == '{' || c == '|';
+ return c == '\0' || is_glob_special(c) ||
+ c == '$' || c == '(' || c == ')' || c == '+' ||
+ c == '.' || c == '^' || c == '{' || c == '|';
}
static int is_fixed(const char *s)
diff --git a/test-ctype.c b/test-ctype.c
index 723eff4..d6425d5 100644
--- a/test-ctype.c
+++ b/test-ctype.c
@@ -21,6 +21,11 @@ static int test_isalnum(int c)
return isalnum(c);
}
+static int test_is_glob_special(int c)
+{
+ return is_glob_special(c);
+}
+
#define DIGIT "0123456789"
#define LOWER "abcdefghijklmnopqrstuvwxyz"
#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
@@ -34,6 +39,7 @@ static const struct ctype_class {
{ "isspace", test_isspace, " \n\r\t" },
{ "isalpha", test_isalpha, LOWER UPPER },
{ "isalnum", test_isalnum, LOWER UPPER DIGIT },
+ { "is_glob_special", test_is_glob_special, "*?[\\" },
{ NULL }
};
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/4] Add is_regex_special()
2009-01-13 8:13 ` Junio C Hamano
` (2 preceding siblings ...)
2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe
@ 2009-01-17 15:50 ` René Scharfe
3 siblings, 0 replies; 12+ messages in thread
From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List
Add is_regex_special(), a character class macro for chars that have a
special meaning in regular expressions.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
This patch applies to next (plus the previous ones in this series).
ctype.c | 7 ++++---
git-compat-util.h | 2 ++
grep.c | 9 +--------
test-ctype.c | 6 ++++++
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/ctype.c b/ctype.c
index 9de187c..b90ec00 100644
--- a/ctype.c
+++ b/ctype.c
@@ -10,16 +10,17 @@ enum {
A = GIT_ALPHA,
D = GIT_DIGIT,
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
+ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
};
unsigned char sane_ctype[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
- S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */
+ S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */
D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
- A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */
0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
- A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */
+ A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */
/* Nothing in the 128.. range */
};
diff --git a/git-compat-util.h b/git-compat-util.h
index 7c92588..079cbe9 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -328,12 +328,14 @@ extern unsigned char sane_ctype[256];
#define GIT_DIGIT 0x02
#define GIT_ALPHA 0x04
#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
#define isspace(x) sane_istest(x,GIT_SPACE)
#define isdigit(x) sane_istest(x,GIT_DIGIT)
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
diff --git a/grep.c b/grep.c
index f9a4525..062b2b6 100644
--- a/grep.c
+++ b/grep.c
@@ -28,16 +28,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat,
p->next = NULL;
}
-static int isregexspecial(int c)
-{
- return c == '\0' || is_glob_special(c) ||
- c == '$' || c == '(' || c == ')' || c == '+' ||
- c == '.' || c == '^' || c == '{' || c == '|';
-}
-
static int is_fixed(const char *s)
{
- while (!isregexspecial(*s))
+ while (*s && !is_regex_special(*s))
s++;
return !*s;
}
diff --git a/test-ctype.c b/test-ctype.c
index d6425d5..033c749 100644
--- a/test-ctype.c
+++ b/test-ctype.c
@@ -26,6 +26,11 @@ static int test_is_glob_special(int c)
return is_glob_special(c);
}
+static int test_is_regex_special(int c)
+{
+ return is_regex_special(c);
+}
+
#define DIGIT "0123456789"
#define LOWER "abcdefghijklmnopqrstuvwxyz"
#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
@@ -40,6 +45,7 @@ static const struct ctype_class {
{ "isalpha", test_isalpha, LOWER UPPER },
{ "isalnum", test_isalnum, LOWER UPPER DIGIT },
{ "is_glob_special", test_is_glob_special, "*?[\\" },
+ { "is_regex_special", test_is_regex_special, "$()*+.?[\\^{|" },
{ NULL }
};
--
1.6.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2009-01-17 15:52 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-09 23:08 [PATCH 1/2] grep -w: forward to next possible position after rejected match René Scharfe
2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe
2009-01-10 20:37 ` Junio C Hamano
2009-01-12 12:25 ` Mikael Magnusson
2009-01-12 13:33 ` Johannes Schindelin
2009-01-12 15:32 ` Alex Riesen
2009-01-12 19:18 ` René Scharfe
2009-01-13 8:13 ` Junio C Hamano
2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe
2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe
2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe
2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).