* [PATCH 1/2] grep -w: forward to next possible position after rejected match @ 2009-01-09 23:08 René Scharfe 2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe 0 siblings, 1 reply; 12+ messages in thread From: René Scharfe @ 2009-01-09 23:08 UTC (permalink / raw) To: Git Mailing List; +Cc: Junio C Hamano grep -w accepts matches between non-word characters, only. If a match from regexec() doesn't meet this criteria, grep continues its search after the first character of that match. We can be a bit smarter here and skip all positions that follow a word character first, as they can't match our criteria. This way we can consume characters quite cheaply and don't need to special-case the handling of the beginning of a line. Here's a contrived example command on msysgit (best of five runs): $ time git grep -w ...... v1.6.1 >/dev/null real 0m1.611s user 0m0.000s sys 0m0.015s With the patch it's quite a bit faster: $ time git grep -w ...... v1.6.1 >/dev/null real 0m1.179s user 0m0.000s sys 0m0.015s More common search patterns will gain a lot less, but it's a nice clean up anyway. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- grep.c | 11 +++++++---- 1 files changed, 7 insertions(+), 4 deletions(-) diff --git a/grep.c b/grep.c index 49e9319..394703b 100644 --- a/grep.c +++ b/grep.c @@ -294,7 +294,6 @@ static struct { static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx) { int hit = 0; - int at_true_bol = 1; int saved_ch = 0; regmatch_t pmatch[10]; @@ -337,7 +336,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol * either end of the line, or at word boundary * (i.e. the next char must not be a word char). */ - if ( ((pmatch[0].rm_so == 0 && at_true_bol) || + if ( ((pmatch[0].rm_so == 0) || !word_char(bol[pmatch[0].rm_so-1])) && ((pmatch[0].rm_eo == (eol-bol)) || !word_char(bol[pmatch[0].rm_eo])) ) @@ -349,10 +348,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol /* There could be more than one match on the * line, and the first match might not be * strict word match. But later ones could be! + * Forward to the next possible start, i.e. the + * next position following a non-word char. */ bol = pmatch[0].rm_so + bol + 1; - at_true_bol = 0; - goto again; + while (word_char(bol[-1]) && bol < eol) + bol++; + if (bol < eol) + goto again; } } if (p->token == GREP_PATTERN_HEAD && saved_ch) -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-09 23:08 [PATCH 1/2] grep -w: forward to next possible position after rejected match René Scharfe @ 2009-01-09 23:18 ` René Scharfe 2009-01-10 20:37 ` Junio C Hamano ` (2 more replies) 0 siblings, 3 replies; 12+ messages in thread From: René Scharfe @ 2009-01-09 23:18 UTC (permalink / raw) To: Git Mailing List; +Cc: Junio C Hamano Add the new flag "fixed" to struct grep_pat and set it if the pattern is doesn't contain any regex control characters in addition to if the flag -F/--fixed-strings was specified. This gives a nice speed up on msysgit, where regexec() seems to be extra slow. Before (best of five runs): $ time git grep grep v1.6.1 >/dev/null real 0m0.552s user 0m0.000s sys 0m0.000s $ time git grep -F grep v1.6.1 >/dev/null real 0m0.170s user 0m0.000s sys 0m0.015s With the patch: $ time git grep grep v1.6.1 >/dev/null real 0m0.173s user 0m0.000s sys 0m0.000s The difference is much smaller on Linux, but still measurable. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- grep.c | 29 +++++++++++++++++++++++++---- grep.h | 1 + 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/grep.c b/grep.c index 394703b..a1092df 100644 --- a/grep.c +++ b/grep.c @@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } +static int isregexspecial(int c) +{ + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || + c == '.' || c == '^' || c == '{' || c == '|'; +} + +static int is_fixed(const char *s) +{ + while (!isregexspecial(*s)) + s++; + return !*s; +} + static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int err = regcomp(&p->regexp, p->pattern, opt->regflags); + int err; + + if (opt->fixed || is_fixed(p->pattern)) + p->fixed = 1; + if (opt->regflags & REG_ICASE) + p->fixed = 0; + if (p->fixed) + return; + + err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; char where[1024]; @@ -159,8 +181,7 @@ void compile_grep_patterns(struct grep_opt *opt) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - if (!opt->fixed) - compile_regexp(p, opt); + compile_regexp(p, opt); break; default: opt->extended = 1; @@ -314,7 +335,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol } again: - if (!opt->fixed) { + if (!p->fixed) { regex_t *exp = &p->regexp; hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), pmatch, 0); diff --git a/grep.h b/grep.h index 45a222d..5102ce3 100644 --- a/grep.h +++ b/grep.h @@ -30,6 +30,7 @@ struct grep_pat { const char *pattern; enum grep_header_field field; regex_t regexp; + unsigned fixed:1; }; enum grep_expr_node { -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe @ 2009-01-10 20:37 ` Junio C Hamano 2009-01-12 12:25 ` Mikael Magnusson 2009-01-12 15:32 ` Alex Riesen 2 siblings, 0 replies; 12+ messages in thread From: Junio C Hamano @ 2009-01-10 20:37 UTC (permalink / raw) To: René Scharfe; +Cc: Git Mailing List René Scharfe <rene.scharfe@lsrfire.ath.cx> writes: > Add the new flag "fixed" to struct grep_pat and set it if the pattern > is doesn't contain any regex control characters in addition to if the > flag -F/--fixed-strings was specified. > > This gives a nice speed up on msysgit, where regexec() seems to be > extra slow. Before (best of five runs): Thanks, and... > static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) > { > - int err = regcomp(&p->regexp, p->pattern, opt->regflags); > + int err; > + > + if (opt->fixed || is_fixed(p->pattern)) > + p->fixed = 1; > + if (opt->regflags & REG_ICASE) > + p->fixed = 0; ... thanks again for being extra careful. That's why I *love* your patches. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe 2009-01-10 20:37 ` Junio C Hamano @ 2009-01-12 12:25 ` Mikael Magnusson 2009-01-12 13:33 ` Johannes Schindelin 2009-01-12 15:32 ` Alex Riesen 2 siblings, 1 reply; 12+ messages in thread From: Mikael Magnusson @ 2009-01-12 12:25 UTC (permalink / raw) To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>: > Add the new flag "fixed" to struct grep_pat and set it if the pattern > is doesn't contain any regex control characters in addition to if the > flag -F/--fixed-strings was specified. > > diff --git a/grep.c b/grep.c > index 394703b..a1092df 100644 > --- a/grep.c > +++ b/grep.c > @@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, > p->next = NULL; > } > > +static int isregexspecial(int c) > +{ > + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || > + c == '.' || c == '^' || c == '{' || c == '|'; > +} Shouldn't this include '*' and '\'? -- Mikael Magnusson ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-12 12:25 ` Mikael Magnusson @ 2009-01-12 13:33 ` Johannes Schindelin 0 siblings, 0 replies; 12+ messages in thread From: Johannes Schindelin @ 2009-01-12 13:33 UTC (permalink / raw) To: Mikael Magnusson; +Cc: René Scharfe, Git Mailing List, Junio C Hamano [-- Attachment #1: Type: TEXT/PLAIN, Size: 844 bytes --] Hi, On Mon, 12 Jan 2009, Mikael Magnusson wrote: > 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>: > > Add the new flag "fixed" to struct grep_pat and set it if the pattern > > is doesn't contain any regex control characters in addition to if the > > flag -F/--fixed-strings was specified. > > > > diff --git a/grep.c b/grep.c > > index 394703b..a1092df 100644 > > --- a/grep.c > > +++ b/grep.c > > @@ -28,9 +28,31 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, > > p->next = NULL; > > } > > > > +static int isregexspecial(int c) > > +{ > > + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || > > + c == '.' || c == '^' || c == '{' || c == '|'; > > +} > > Shouldn't this include '*' and '\'? This is covered by isspecial(): see ctype.c. Hth, Dscho ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe 2009-01-10 20:37 ` Junio C Hamano 2009-01-12 12:25 ` Mikael Magnusson @ 2009-01-12 15:32 ` Alex Riesen 2009-01-12 19:18 ` René Scharfe 2 siblings, 1 reply; 12+ messages in thread From: Alex Riesen @ 2009-01-12 15:32 UTC (permalink / raw) To: René Scharfe; +Cc: Git Mailing List, Junio C Hamano 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>: > +static int isregexspecial(int c) > +{ > + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || > + c == '.' || c == '^' || c == '{' || c == '|'; > +} > + > +static int is_fixed(const char *s) > +{ > + while (!isregexspecial(*s)) > + s++; > + return !*s; > +} strchr? ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-12 15:32 ` Alex Riesen @ 2009-01-12 19:18 ` René Scharfe 2009-01-13 8:13 ` Junio C Hamano 0 siblings, 1 reply; 12+ messages in thread From: René Scharfe @ 2009-01-12 19:18 UTC (permalink / raw) To: Alex Riesen; +Cc: Git Mailing List, Junio C Hamano Alex Riesen schrieb: > 2009/1/10 René Scharfe <rene.scharfe@lsrfire.ath.cx>: >> +static int isregexspecial(int c) >> +{ >> + return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || >> + c == '.' || c == '^' || c == '{' || c == '|'; >> +} >> + >> +static int is_fixed(const char *s) >> +{ >> + while (!isregexspecial(*s)) >> + s++; >> + return !*s; >> +} > > strchr? Oh, yes, that would look nicer. Another option is to extend ctype.c and implement isregexspecial() -- and while we're at it islowerxdigit() (builtin-name-rev.c::ishex()) and iswordchar() (config.c::iskeychar(), grep.c::word_char()), too -- as table lookups. I.e., something like the following (untested). Which of the mentioned functions are really worth of this promotion? The isregexspecial() char class has more members than isspecial(), but it's not performance critical (unless you have a lot of patterns and only a small amount of data to grep :). Are there more candidates for ctype-ification? René ctype.c | 14 ++++++++++---- git-compat-util.h | 6 ++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ctype.c b/ctype.c index 9208d67..1a76586 100644 --- a/ctype.c +++ b/ctype.c @@ -10,20 +10,26 @@ #undef AA #undef DD #undef GS +#undef RR +#undef US +#undef Ah #define SS GIT_SPACE #define AA GIT_ALPHA #define DD GIT_DIGIT #define GS GIT_SPECIAL /* \0, *, ?, [, \\ */ +#define RR GIT_REGEX_SPECIAL /* $, (, ), +, ., ^, {, | */ +#define US GIT_UNDERSCORE +#define Ah (GIT_ALPHA | GIT_LOWER_XDIGIT) unsigned char sane_ctype[256] = { GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */ - SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */ + SS, 0, 0, 0, RR, 0, 0, 0, RR, RR, GS, RR, 0, 0, RR, 0, /* 32-15 */ DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */ 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */ - 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */ + AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, RR, US, /* 80-15 */ + 0, Ah, Ah, Ah, Ah, Ah, Ah, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */ + AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, RR, RR, 0, 0, 0, /* 112-15 */ /* Nothing in the 128.. range */ }; diff --git a/git-compat-util.h b/git-compat-util.h index e20b1e8..5eaa662 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -328,12 +328,18 @@ extern unsigned char sane_ctype[256]; #define GIT_DIGIT 0x02 #define GIT_ALPHA 0x04 #define GIT_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define GIT_UNDERSCORE 0x20 +#define GIT_LOWER_XDIGIT 0x40 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define isspecial(x) sane_istest(x,GIT_SPECIAL) +#define isregexspecial(x) sane_istest(x,GIT_SPECIAL | GIT_REGEX_SPECIAL) +#define iswordchar(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT | GIT_UNDERSCORE) +#define islowerxdigit(x) sane_istest(x,GIT_DIGIT | GIT_LOWER_XDIGIT) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 2/2] grep: don't call regexec() for fixed strings 2009-01-12 19:18 ` René Scharfe @ 2009-01-13 8:13 ` Junio C Hamano 2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe ` (3 more replies) 0 siblings, 4 replies; 12+ messages in thread From: Junio C Hamano @ 2009-01-13 8:13 UTC (permalink / raw) To: René Scharfe; +Cc: Alex Riesen, Git Mailing List René Scharfe <rene.scharfe@lsrfire.ath.cx> writes: > +#define RR GIT_REGEX_SPECIAL /* $, (, ), +, ., ^, {, | */ > +#define US GIT_UNDERSCORE > +#define Ah (GIT_ALPHA | GIT_LOWER_XDIGIT) > > unsigned char sane_ctype[256] = { > GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */ Mental note. NUL is marked as GIT_SPECIAL. > #define isspecial(x) sane_istest(x,GIT_SPECIAL) > +#define isregexspecial(x) sane_istest(x,GIT_SPECIAL | GIT_REGEX_SPECIAL) Perhaps isspecial() is misnamed if we were to enhance git-ctype in this way. It is about a byte being shell glob pattern or a NUL (!!!), and it should be renamed to isglobspecial() or something. dir.c uses isspecial() in two places, and both callers rely on NUL being a part of special to terminate the loops they are in, like this: for (;;) { unsigned char c = *match++; len++; if (isspecial(c)) return len; } It may be a cunning and cute logic, but I do not particularly like it. It might be cleaner to rename it to isglobspecial(), drop NUL from it, and have these two call existing call sites to explicitly check for (c == NUL) for loop termination. ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/4] Add ctype test 2009-01-13 8:13 ` Junio C Hamano @ 2009-01-17 15:50 ` René Scharfe 2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe ` (2 subsequent siblings) 3 siblings, 0 replies; 12+ messages in thread From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw) To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List Manipulating the character class table in ctype.c by hand is error prone. To ensure that typos are found quickly, add a test program and script. test-ctype checks the output of the character class macros isspace() et. al. by applying them on all possible char values and consulting a list of all characters in the particular class. It doesn't check tolower() and toupper(); this could be added later. The test script t0070-fundamental.sh is created because there is no good place for the ctype test, yet -- except for t0000-basic.sh perhaps, but it doesn't run well on Windows, yet. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- Makefile | 3 ++ t/t0070-fundamental.sh | 15 +++++++++++ test-ctype.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 0 deletions(-) create mode 100755 t/t0070-fundamental.sh create mode 100644 test-ctype.c diff --git a/Makefile b/Makefile index 4b1d488..dca61f5 100644 --- a/Makefile +++ b/Makefile @@ -1360,6 +1360,7 @@ endif ### Testing rules TEST_PROGRAMS += test-chmtime$X +TEST_PROGRAMS += test-ctype$X TEST_PROGRAMS += test-date$X TEST_PROGRAMS += test-delta$X TEST_PROGRAMS += test-genrandom$X @@ -1379,6 +1380,8 @@ export NO_SVN_TESTS test: all $(MAKE) -C t/ all +test-ctype$X: ctype.o + test-date$X: date.o ctype.o test-delta$X: diff-delta.o patch-delta.o diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh new file mode 100755 index 0000000..680d7d6 --- /dev/null +++ b/t/t0070-fundamental.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +test_description='check that the most basic functions work + + +Verify wrappers and compatibility functions. +' + +. ./test-lib.sh + +test_expect_success 'character classes (isspace, isalpha etc.)' ' + test-ctype +' + +test_done diff --git a/test-ctype.c b/test-ctype.c new file mode 100644 index 0000000..723eff4 --- /dev/null +++ b/test-ctype.c @@ -0,0 +1,66 @@ +#include "cache.h" + + +static int test_isdigit(int c) +{ + return isdigit(c); +} + +static int test_isspace(int c) +{ + return isspace(c); +} + +static int test_isalpha(int c) +{ + return isalpha(c); +} + +static int test_isalnum(int c) +{ + return isalnum(c); +} + +#define DIGIT "0123456789" +#define LOWER "abcdefghijklmnopqrstuvwxyz" +#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +static const struct ctype_class { + const char *name; + int (*test_fn)(int); + const char *members; +} classes[] = { + { "isdigit", test_isdigit, DIGIT }, + { "isspace", test_isspace, " \n\r\t" }, + { "isalpha", test_isalpha, LOWER UPPER }, + { "isalnum", test_isalnum, LOWER UPPER DIGIT }, + { NULL } +}; + +static int test_class(const struct ctype_class *test) +{ + int i, rc = 0; + + for (i = 0; i < 256; i++) { + int expected = i ? !!strchr(test->members, i) : 0; + int actual = test->test_fn(i); + + if (actual != expected) { + rc = 1; + printf("%s classifies char %d (0x%02x) wrongly\n", + test->name, i, i); + } + } + return rc; +} + +int main(int argc, char **argv) +{ + const struct ctype_class *test; + int rc = 0; + + for (test = classes; test->name; test++) + rc |= test_class(test); + + return rc; +} -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/4] Reformat ctype.c 2009-01-13 8:13 ` Junio C Hamano 2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe @ 2009-01-17 15:50 ` René Scharfe 2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe 2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe 3 siblings, 0 replies; 12+ messages in thread From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw) To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List Enhance the readability of ctype.c by using an enum instead of macros to initialize the character class table. This allows the use of a single letter to mark a char, making the table fit within 80 columns. Also list the index of the last entry in each row in the following comment. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- ctype.c | 32 ++++++++++++++------------------ 1 files changed, 14 insertions(+), 18 deletions(-) diff --git a/ctype.c b/ctype.c index 9208d67..6528687 100644 --- a/ctype.c +++ b/ctype.c @@ -5,25 +5,21 @@ */ #include "cache.h" -/* Just so that no insane platform contaminate namespace with these symbols */ -#undef SS -#undef AA -#undef DD -#undef GS - -#define SS GIT_SPACE -#define AA GIT_ALPHA -#define DD GIT_DIGIT -#define GS GIT_SPECIAL /* \0, *, ?, [, \\ */ +enum { + S = GIT_SPACE, + A = GIT_ALPHA, + D = GIT_DIGIT, + G = GIT_SPECIAL, /* \0, *, ?, [, \\ */ +}; unsigned char sane_ctype[256] = { - GS, 0, 0, 0, 0, 0, 0, 0, 0, SS, SS, 0, 0, SS, 0, 0, /* 0-15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-15 */ - SS, 0, 0, 0, 0, 0, 0, 0, 0, 0, GS, 0, 0, 0, 0, 0, /* 32-15 */ - DD, DD, DD, DD, DD, DD, DD, DD, DD, DD, 0, 0, 0, 0, 0, GS, /* 48-15 */ - 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 64-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, GS, GS, 0, 0, 0, /* 80-15 */ - 0, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, /* 96-15 */ - AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, AA, 0, 0, 0, 0, 0, /* 112-15 */ + G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ + S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/4] Change NUL char handling of isspecial() 2009-01-13 8:13 ` Junio C Hamano 2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe 2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe @ 2009-01-17 15:50 ` René Scharfe 2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe 3 siblings, 0 replies; 12+ messages in thread From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw) To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List Replace isspecial() by the new macro is_glob_special(), which is more, well, specialized. The former included the NUL char in its character class, while the letter only included characters that are special to file name globbing. The new name contains underscores because they enhance readability considerably now that it's made up of three words. Renaming the function is necessary to document its changed scope. The call sites of isspecial() are updated to check explicitly for NUL. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- This patch applies to next (plus the previous ones in this series). ctype.c | 4 ++-- dir.c | 4 ++-- git-compat-util.h | 4 ++-- grep.c | 5 +++-- test-ctype.c | 6 ++++++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/ctype.c b/ctype.c index 6528687..9de187c 100644 --- a/ctype.c +++ b/ctype.c @@ -9,11 +9,11 @@ enum { S = GIT_SPACE, A = GIT_ALPHA, D = GIT_DIGIT, - G = GIT_SPECIAL, /* \0, *, ?, [, \\ */ + G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ }; unsigned char sane_ctype[256] = { - G, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ diff --git a/dir.c b/dir.c index 7c59829..d55a41a 100644 --- a/dir.c +++ b/dir.c @@ -75,7 +75,7 @@ static int match_one(const char *match, const char *name, int namelen) for (;;) { unsigned char c1 = *match; unsigned char c2 = *name; - if (isspecial(c1)) + if (c1 == '\0' || is_glob_special(c1)) break; if (c1 != c2) return 0; @@ -678,7 +678,7 @@ static int simple_length(const char *match) for (;;) { unsigned char c = *match++; len++; - if (isspecial(c)) + if (c == '\0' || is_glob_special(c)) return len; } } diff --git a/git-compat-util.h b/git-compat-util.h index e20b1e8..7c92588 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -327,13 +327,13 @@ extern unsigned char sane_ctype[256]; #define GIT_SPACE 0x01 #define GIT_DIGIT 0x02 #define GIT_ALPHA 0x04 -#define GIT_SPECIAL 0x08 +#define GIT_GLOB_SPECIAL 0x08 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define isspecial(x) sane_istest(x,GIT_SPECIAL) +#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) diff --git a/grep.c b/grep.c index 6485760..f9a4525 100644 --- a/grep.c +++ b/grep.c @@ -30,8 +30,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, static int isregexspecial(int c) { - return isspecial(c) || c == '$' || c == '(' || c == ')' || c == '+' || - c == '.' || c == '^' || c == '{' || c == '|'; + return c == '\0' || is_glob_special(c) || + c == '$' || c == '(' || c == ')' || c == '+' || + c == '.' || c == '^' || c == '{' || c == '|'; } static int is_fixed(const char *s) diff --git a/test-ctype.c b/test-ctype.c index 723eff4..d6425d5 100644 --- a/test-ctype.c +++ b/test-ctype.c @@ -21,6 +21,11 @@ static int test_isalnum(int c) return isalnum(c); } +static int test_is_glob_special(int c) +{ + return is_glob_special(c); +} + #define DIGIT "0123456789" #define LOWER "abcdefghijklmnopqrstuvwxyz" #define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -34,6 +39,7 @@ static const struct ctype_class { { "isspace", test_isspace, " \n\r\t" }, { "isalpha", test_isalpha, LOWER UPPER }, { "isalnum", test_isalnum, LOWER UPPER DIGIT }, + { "is_glob_special", test_is_glob_special, "*?[\\" }, { NULL } }; -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/4] Add is_regex_special() 2009-01-13 8:13 ` Junio C Hamano ` (2 preceding siblings ...) 2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe @ 2009-01-17 15:50 ` René Scharfe 3 siblings, 0 replies; 12+ messages in thread From: René Scharfe @ 2009-01-17 15:50 UTC (permalink / raw) To: Junio C Hamano; +Cc: Alex Riesen, Git Mailing List Add is_regex_special(), a character class macro for chars that have a special meaning in regular expressions. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> --- This patch applies to next (plus the previous ones in this series). ctype.c | 7 ++++--- git-compat-util.h | 2 ++ grep.c | 9 +-------- test-ctype.c | 6 ++++++ 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/ctype.c b/ctype.c index 9de187c..b90ec00 100644 --- a/ctype.c +++ b/ctype.c @@ -10,16 +10,17 @@ enum { A = GIT_ALPHA, D = GIT_DIGIT, G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ + R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ }; unsigned char sane_ctype[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, 0, 0, 0, 0, 0, 0, G, 0, 0, 0, 0, 0, /* 32.. 47 */ + S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, 0, 0, /* 80.. 95 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, 0, 0, 0, 0, 0, /* 112..127 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; diff --git a/git-compat-util.h b/git-compat-util.h index 7c92588..079cbe9 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -328,12 +328,14 @@ extern unsigned char sane_ctype[256]; #define GIT_DIGIT 0x02 #define GIT_ALPHA 0x04 #define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) +#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) diff --git a/grep.c b/grep.c index f9a4525..062b2b6 100644 --- a/grep.c +++ b/grep.c @@ -28,16 +28,9 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } -static int isregexspecial(int c) -{ - return c == '\0' || is_glob_special(c) || - c == '$' || c == '(' || c == ')' || c == '+' || - c == '.' || c == '^' || c == '{' || c == '|'; -} - static int is_fixed(const char *s) { - while (!isregexspecial(*s)) + while (*s && !is_regex_special(*s)) s++; return !*s; } diff --git a/test-ctype.c b/test-ctype.c index d6425d5..033c749 100644 --- a/test-ctype.c +++ b/test-ctype.c @@ -26,6 +26,11 @@ static int test_is_glob_special(int c) return is_glob_special(c); } +static int test_is_regex_special(int c) +{ + return is_regex_special(c); +} + #define DIGIT "0123456789" #define LOWER "abcdefghijklmnopqrstuvwxyz" #define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" @@ -40,6 +45,7 @@ static const struct ctype_class { { "isalpha", test_isalpha, LOWER UPPER }, { "isalnum", test_isalnum, LOWER UPPER DIGIT }, { "is_glob_special", test_is_glob_special, "*?[\\" }, + { "is_regex_special", test_is_regex_special, "$()*+.?[\\^{|" }, { NULL } }; -- 1.6.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2009-01-17 15:52 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-01-09 23:08 [PATCH 1/2] grep -w: forward to next possible position after rejected match René Scharfe 2009-01-09 23:18 ` [PATCH 2/2] grep: don't call regexec() for fixed strings René Scharfe 2009-01-10 20:37 ` Junio C Hamano 2009-01-12 12:25 ` Mikael Magnusson 2009-01-12 13:33 ` Johannes Schindelin 2009-01-12 15:32 ` Alex Riesen 2009-01-12 19:18 ` René Scharfe 2009-01-13 8:13 ` Junio C Hamano 2009-01-17 15:50 ` [PATCH 1/4] Add ctype test René Scharfe 2009-01-17 15:50 ` [PATCH 2/4] Reformat ctype.c René Scharfe 2009-01-17 15:50 ` [PATCH 3/4] Change NUL char handling of isspecial() René Scharfe 2009-01-17 15:50 ` [PATCH 4/4] Add is_regex_special() René Scharfe
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).