From: Fredrik Kuivinen <frekui@gmail.com>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>
Subject: [PATCH 5/5] Use kwset in grep
Date: Sat, 13 Feb 2010 15:21:10 +0100 [thread overview]
Message-ID: <20100213142110.GF9543@fredrik-laptop> (raw)
In-Reply-To: <20100213141558.22851.13660.stgit@fredrik-laptop>
Best of five runs in the linux repository:
before:
$ time git grep qwerty
drivers/char/keyboard.c: "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */
real 0m1.065s
user 0m1.400s
sys 0m0.536s
after:
$ time git grep qwerty
drivers/char/keyboard.c: "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */
real 0m0.621s
user 0m0.560s
sys 0m0.564s
So we gain about 40% by using the kwset code.
Signed-off-by: Fredrik Kuivinen <frekui@gmail.com>
---
grep.c | 61 +++++++++++++++++++++++++++++++++++++++++--------------------
grep.h | 2 ++
2 files changed, 43 insertions(+), 20 deletions(-)
diff --git a/grep.c b/grep.c
index a0864f1..deb5f71 100644
--- a/grep.c
+++ b/grep.c
@@ -51,16 +51,38 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
return ret;
}
+static int is_fixed(const char *s)
+{
+ while (*s && !is_regex_special(*s))
+ s++;
+ return !*s;
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- p->fixed = opt->fixed;
-
- if (p->fixed)
+ p->fixed = 0;
+
+ if (opt->fixed || is_fixed(p->pattern))
+ p->fixed = 1;
+
+ if (p->fixed) {
+ if (opt->regflags & REG_ICASE || p->ignore_case) {
+ static char trans[256];
+ int i;
+ for (i = 0; i < 256; i++)
+ trans[i] = tolower(i);
+ p->kws = kwsalloc(trans);
+ } else {
+ p->kws = kwsalloc(NULL);
+ }
+ kwsincr(p->kws, p->pattern, strlen(p->pattern));
+ kwsprep(p->kws);
return;
+ }
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
@@ -241,7 +263,10 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- regfree(&p->regexp);
+ if (p->fixed)
+ kwsfree(p->kws);
+ else
+ regfree(&p->regexp);
break;
default:
break;
@@ -277,21 +302,17 @@ static void show_name(struct grep_opt *opt, const char *name)
}
-static int fixmatch(const char *pattern, char *line, int ignore_case, regmatch_t *match)
+static int fixmatch(const kwset_t kws, char *line, size_t sz,
+ regmatch_t *match)
{
- char *hit;
- if (ignore_case)
- hit = strcasestr(line, pattern);
- else
- hit = strstr(line, pattern);
-
- if (!hit) {
+ struct kwsmatch kwsm;
+ size_t offset = kwsexec(kws, line, sz, &kwsm);
+ if (offset == -1) {
match->rm_so = match->rm_eo = -1;
return REG_NOMATCH;
- }
- else {
- match->rm_so = hit - line;
- match->rm_eo = match->rm_so + strlen(pattern);
+ } else {
+ match->rm_so = offset;
+ match->rm_eo = match->rm_so + kwsm.size[0];
return 0;
}
}
@@ -346,7 +367,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
again:
if (p->fixed)
- hit = !fixmatch(p->pattern, bol, p->ignore_case, pmatch);
+ hit = !fixmatch(p->kws, bol, eol-bol, pmatch);
else
hit = !regexec(&p->regexp, bol, 1, pmatch, eflags);
@@ -670,9 +691,9 @@ static int look_ahead(struct grep_opt *opt,
int hit;
regmatch_t m;
- if (p->fixed)
- hit = !fixmatch(p->pattern, bol, p->ignore_case, &m);
- else {
+ if (p->fixed) {
+ hit = !fixmatch(p->kws, bol, *left_p, &m);
+ } else {
#ifdef REG_STARTEND
m.rm_so = 0;
m.rm_eo = *left_p;
diff --git a/grep.h b/grep.h
index 9703087..3c79154 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#include "kwset.h"
enum grep_pat_token {
GREP_PATTERN,
@@ -31,6 +32,7 @@ struct grep_pat {
const char *pattern;
enum grep_header_field field;
regex_t regexp;
+ kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
next prev parent reply other threads:[~2010-02-13 14:21 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20100213141558.22851.13660.stgit@fredrik-laptop>
2010-02-13 14:20 ` [PATCH 1/5] Add obstack.[ch] from EGLIBC 2.10 Fredrik Kuivinen
2010-02-13 14:20 ` [PATCH 2/5] Add string search routines from GNU grep Fredrik Kuivinen
2010-02-13 15:49 ` Dmitry Potapov
2010-02-13 15:56 ` Paolo Bonzini
2010-02-14 16:52 ` Fredrik Kuivinen
2010-02-13 14:20 ` [PATCH 3/5] Adapt the kwset code to Git Fredrik Kuivinen
2010-02-13 14:21 ` [PATCH 4/5] Use kwset in pickaxe Fredrik Kuivinen
2010-02-13 14:21 ` Fredrik Kuivinen [this message]
2010-02-13 15:58 ` [PATCH 5/5] Use kwset in grep Paolo Bonzini
2010-02-13 17:38 ` Paolo Bonzini
2010-02-14 16:51 ` Fredrik Kuivinen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100213142110.GF9543@fredrik-laptop \
--to=frekui@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).