From: "Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
To: Git List <git@vger.kernel.org>
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Martin Langhoff" <martin.langhoff@gmail.com>,
"Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
Subject: [PATCH V2 4/5] git-grep: Learn PCRE
Date: Thu, 5 May 2011 00:00:20 +0200 [thread overview]
Message-ID: <1304546421-25439-5-git-send-email-michal.kiedrowicz@gmail.com> (raw)
In-Reply-To: <1304546421-25439-1-git-send-email-michal.kiedrowicz@gmail.com>
This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.
PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
---
Documentation/git-grep.txt | 6 +++
Makefile | 16 +++++++
builtin/grep.c | 2 +
contrib/completion/git-completion.bash | 1 +
grep.c | 77 +++++++++++++++++++++++++++++++-
grep.h | 9 ++++
6 files changed, 110 insertions(+), 1 deletions(-)
diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4a58378..e150c77 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -12,6 +12,7 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
+ [-P | --perl-regexp]
[-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
@@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.
+-P::
+--perl-regexp::
+ Use Perl-compatible regexp for patterns. Requires libpcre to be
+ compiled in.
+
-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern
diff --git a/Makefile b/Makefile
index 3a1fe20..98841dc 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
+# Define NO_LIBPCRE if you do not have libpcre installed. git-grep cannot use
+# Perl-compatible regexes.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
@@ -1251,6 +1257,16 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif
+ifdef NO_LIBPCRE
+ BASIC_CFLAGS += -DNO_LIBPCRE
+else
+ ifdef LIBPCREDIR
+ BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+ EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+ endif
+ EXTLIBS += -lpcre
+endif
+
ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
diff --git a/builtin/grep.c b/builtin/grep.c
index 10a1f65..6831975 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
REG_EXTENDED),
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
"interpret patterns as fixed strings"),
+ OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
+ "use Perl-compatible regular expressions"),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 4b2654d..95790a1 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1487,6 +1487,7 @@ _git_grep ()
--text --ignore-case --word-regexp --invert-match
--full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
+ --perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth
diff --git a/grep.c b/grep.c
index d67baf9..561b791 100644
--- a/grep.c
+++ b/grep.c
@@ -3,6 +3,71 @@
#include "userdiff.h"
#include "xdiff-interface.h"
+#ifdef NO_LIBPCRE
+static void compile_pcre_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+ die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+static int pcrematch(struct grep_pat *p, char *line, char *eol,
+ regmatch_t *match, int eflags)
+{
+ die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+ die("cannot use Perl-compatible regexes when libpcre is not compiled in");
+}
+
+#else /* !NO_LIBPCRE */
+static void compile_pcre_regexp(struct grep_pat *p, struct grep_opt *opt)
+{
+ const char *error;
+ int erroffset;
+ int options = 0;
+
+ if (opt->ignore_case)
+ options |= PCRE_CASELESS;
+
+ p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
+ NULL);
+ if (!p->pcre_regexp)
+ die("'%s': %s", p->pattern, error);
+
+ p->extra = pcre_study(p->pcre_regexp, 0, &error);
+ if (!p->extra && error)
+ die("%s", error);
+}
+
+static int pcrematch(struct grep_pat *p, char *line, char *eol,
+ regmatch_t *match, int eflags)
+{
+ int ovector[30], ret, flags = 0;
+
+ if (eflags & REG_NOTBOL)
+ flags |= PCRE_NOTBOL;
+
+ ret = pcre_exec(p->pcre_regexp, p->extra, line, eol - line, 0, flags,
+ ovector, ARRAY_SIZE(ovector));
+ if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+ die("pcre_exec failed with error code %d", ret);
+ if (ret > 0) {
+ ret = 0;
+ match->rm_so = ovector[0];
+ match->rm_eo = ovector[1];
+ }
+
+ return ret;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+ pcre_free(p->pcre_regexp);
+ pcre_free(p->extra);
+}
+#endif /* !NO_LIBPCRE */
+
void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
@@ -70,6 +135,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;
+ if (opt->pcre) {
+ compile_pcre_regexp(p, opt);
+ return;
+ }
+
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
@@ -320,7 +390,10 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- regfree(&p->regexp);
+ if (p->pcre_regexp)
+ free_pcre_regexp(p);
+ else
+ regfree(&p->regexp);
break;
default:
break;
@@ -419,6 +492,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
if (p->fixed)
hit = !fixmatch(p, line, eol, match);
+ else if (p->pcre_regexp)
+ hit = !pcrematch(p, line, eol, match, eflags);
else
hit = !regmatch(&p->regexp, line, eol, match, eflags);
diff --git a/grep.h b/grep.h
index 06621fe..68aa47a 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,12 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#ifndef NO_LIBPCRE
+#include <pcre.h>
+#else
+typedef int pcre;
+typedef int pcre_extra;
+#endif /* NO_LIBPCRE */
enum grep_pat_token {
GREP_PATTERN,
@@ -33,6 +39,8 @@ struct grep_pat {
size_t patternlen;
enum grep_header_field field;
regex_t regexp;
+ pcre *pcre_regexp;
+ pcre_extra *extra;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
@@ -83,6 +91,7 @@ struct grep_opt {
#define GREP_BINARY_TEXT 2
int binary;
int extended;
+ int pcre;
int relative;
int pathname;
int null_following_name;
--
1.7.3.4
next prev parent reply other threads:[~2011-05-04 22:01 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-05-04 22:00 [PATCH V2 0/5] Add PCRE support to git-grep Michał Kiedrowicz
2011-05-04 22:00 ` [PATCH V2 1/5] Documentation: Add --line-number to git-grep synopsis Michał Kiedrowicz
2011-05-04 22:00 ` [PATCH V2 2/5] contrib/completion: --line-number to git grep Michał Kiedrowicz
2011-05-04 22:00 ` [PATCH V2 3/5] grep: Put calls to fixmatch() and regmatch() into patmatch() Michał Kiedrowicz
2011-05-04 22:00 ` Michał Kiedrowicz [this message]
2011-05-05 1:09 ` [PATCH V2 4/5] git-grep: Learn PCRE Junio C Hamano
2011-05-05 5:47 ` Bert Wesarg
2011-05-05 16:55 ` Junio C Hamano
2011-05-05 6:19 ` Johannes Sixt
2011-05-05 7:41 ` Michal Kiedrowicz
2011-05-05 7:49 ` Johannes Sixt
2011-05-05 8:38 ` Michal Kiedrowicz
2011-05-05 6:28 ` Johannes Sixt
2011-05-05 7:43 ` Michal Kiedrowicz
2011-05-05 7:43 ` Alex Riesen
2011-05-04 22:00 ` [PATCH V2 5/5] configure: Check for libpcre Michał Kiedrowicz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1304546421-25439-5-git-send-email-michal.kiedrowicz@gmail.com \
--to=michal.kiedrowicz@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=martin.langhoff@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).