From: "Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
To: Git List <git@vger.kernel.org>
Cc: "Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
Subject: [RFC PATCH 3/3] git-grep: Learn PCRE
Date: Tue, 3 May 2011 23:35:57 +0200 [thread overview]
Message-ID: <1304458557-25629-3-git-send-email-michal.kiedrowicz@gmail.com> (raw)
In-Reply-To: <1304458557-25629-1-git-send-email-michal.kiedrowicz@gmail.com>
This is quite minimal (I hope) patch that teaches git-grep the
--perl-regexp/-P options in order to allow specifying PCRE regexes on
the command line.
PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.
Here are some implementation notes:
Names of command line options are borrowed from GNU grep.
Makefile changes are based on curl handing.
The configure script wasn't updated.
This patch lacks tests.
AFAIK grep.c is a part of libgit.a, so libpcre is linked with the git
itself, which bloats it. I don't like it, especially because it makes
'make test' take:
real 6m27.558s
user 1m34.392s
sys 2m11.029s
instead of
real 3m41.322s
user 1m44.005s
sys 2m32.403s
on my PC.
---
Documentation/git-grep.txt | 6 ++++
Makefile | 16 ++++++++++++
builtin/grep.c | 2 +
grep.c | 57 ++++++++++++++++++++++++++++++++++++++++++++
grep.h | 8 ++++++
5 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4a58378..e150c77 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -12,6 +12,7 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
+ [-P | --perl-regexp]
[-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
@@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.
+-P::
+--perl-regexp::
+ Use Perl-compatible regexp for patterns. Requires libpcre to be
+ compiled in.
+
-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern
diff --git a/Makefile b/Makefile
index 3a1fe20..98841dc 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
+# Define NO_LIBPCRE if you do not have libpcre installed. git-grep cannot use
+# Perl-compatible regexes.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
@@ -1251,6 +1257,16 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif
+ifdef NO_LIBPCRE
+ BASIC_CFLAGS += -DNO_LIBPCRE
+else
+ ifdef LIBPCREDIR
+ BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+ EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+ endif
+ EXTLIBS += -lpcre
+endif
+
ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
diff --git a/builtin/grep.c b/builtin/grep.c
index 10a1f65..6831975 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
REG_EXTENDED),
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
"interpret patterns as fixed strings"),
+ OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
+ "use Perl-compatible regular expressions"),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
diff --git a/grep.c b/grep.c
index d67baf9..4f5fcbb 100644
--- a/grep.c
+++ b/grep.c
@@ -70,6 +70,30 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;
+ if (opt->pcre) {
+#ifdef NO_LIBPCRE
+ die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+#else
+ const char *error;
+ int erroffset;
+ int options = 0;
+
+ if (opt->ignore_case)
+ options |= PCRE_CASELESS;
+
+ p->re = pcre_compile(p->pattern, options, &error,
+ &erroffset, NULL);
+ if (!p->re)
+ die("'%s': %s", p->pattern, error);
+
+ p->extra = pcre_study(p->re, 0, &error);
+ if (!p->extra && error)
+ die("%s", error);
+
+ return;
+#endif
+ }
+
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
@@ -320,7 +344,16 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
+#ifndef NO_LIBPCRE
+ if (p->re) {
+ pcre_free(p->re);
+ pcre_free(p->extra);
+ } else {
+ regfree(&p->regexp);
+ }
+#else
regfree(&p->regexp);
+#endif
break;
default:
break;
@@ -401,6 +434,26 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol,
}
}
+#ifndef NO_LIBPCRE
+static int pcrematch(const pcre *re, const pcre_extra *extra, char *line,
+ char *eol, regmatch_t *match)
+{
+ int ovector[30], ret;
+
+ ret = pcre_exec(re, NULL, line, eol - line, 0, 0, ovector,
+ ARRAY_SIZE(ovector));
+ if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+ die("pcre_exec failed with error code %d", ret);
+ if (ret > 0) {
+ ret = 0;
+ match->rm_so = ovector[0];
+ match->rm_eo = ovector[1];
+ }
+
+ return ret;
+}
+#endif /* NO_LIBPCRE */
+
static int regmatch(const regex_t *preg, char *line, char *eol,
regmatch_t *match, int eflags)
{
@@ -419,6 +472,10 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
if (p->fixed)
hit = !fixmatch(p, line, eol, match);
+#ifndef NO_LIBPCRE
+ else if (p->re)
+ hit = !pcrematch(p->re, p->extra, line, eol, match);
+#endif /* NO_LIBPCRE */
else
hit = !regmatch(&p->regexp, line, eol, match, eflags);
diff --git a/grep.h b/grep.h
index 06621fe..cd202a9 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,9 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#ifndef NO_LIBPCRE
+#include <pcre.h>
+#endif /* NO_LIBPCRE */
enum grep_pat_token {
GREP_PATTERN,
@@ -33,6 +36,10 @@ struct grep_pat {
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
+#ifndef NO_LIBPCRE
+ pcre *re;
+ pcre_extra *extra;
+#endif /* NO_LIBPCRE */
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
@@ -83,6 +90,7 @@ struct grep_opt {
#define GREP_BINARY_TEXT 2
int binary;
int extended;
+ int pcre;
int relative;
int pathname;
int null_following_name;
--
1.7.3.4
next prev parent reply other threads:[~2011-05-03 21:36 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-03 21:35 [PATCH 1/3] grep: Put calls to fixmatch() and regmatch() into patmatch() Michał Kiedrowicz
2011-05-03 21:35 ` [PATCH 2/3] Documentation: Add --line-number to git-grep synopsis Michał Kiedrowicz
2011-05-03 21:35 ` Michał Kiedrowicz [this message]
2011-05-03 23:11 ` [RFC PATCH 3/3] git-grep: Learn PCRE Junio C Hamano
2011-05-04 6:16 ` Michal Kiedrowicz
2011-05-03 23:15 ` Martin Langhoff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1304458557-25629-3-git-send-email-michal.kiedrowicz@gmail.com \
--to=michal.kiedrowicz@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).