git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
To: Git List <git@vger.kernel.org>
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Martin Langhoff" <martin.langhoff@gmail.com>,
	"Bert Wesarg" <bert.wesarg@googlemail.com>,
	"Johannes Sixt" <j.sixt@viscovery.net>,
	"Alex Riesen" <raa.lkml@gmail.com>,
	"Michał Kiedrowicz" <michal.kiedrowicz@gmail.com>
Subject: [PATCH v3 3/6] git-grep: Learn PCRE
Date: Mon,  9 May 2011 23:52:05 +0200	[thread overview]
Message-ID: <1304977928-7142-4-git-send-email-michal.kiedrowicz@gmail.com> (raw)
In-Reply-To: <1304977928-7142-1-git-send-email-michal.kiedrowicz@gmail.com>

This patch teaches git-grep the --perl-regexp/-P options (naming
borrowed from GNU grep) in order to allow specifying PCRE regexes on the
command line.

PCRE has a number of features which make them more handy to use than
POSIX regexes, like consistent escaping rules, extended character
classes, ungreedy matching etc.

git isn't build with PCRE support automatically. USE_LIBPCRE environment
variable must be enabled (like `make USE_LIBPCRE=YesPlease`).

Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
---
 Documentation/git-grep.txt             |    6 +++
 Makefile                               |   15 ++++++
 builtin/grep.c                         |    2 +
 contrib/completion/git-completion.bash |    1 +
 grep.c                                 |   75 +++++++++++++++++++++++++++++++-
 grep.h                                 |    9 ++++
 6 files changed, 107 insertions(+), 1 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 4a58378..e150c77 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -12,6 +12,7 @@ SYNOPSIS
 'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
 	   [-v | --invert-match] [-h|-H] [--full-name]
 	   [-E | --extended-regexp] [-G | --basic-regexp]
+	   [-P | --perl-regexp]
 	   [-F | --fixed-strings] [-n | --line-number]
 	   [-l | --files-with-matches] [-L | --files-without-match]
 	   [(-O | --open-files-in-pager) [<pager>]]
@@ -97,6 +98,11 @@ OPTIONS
 	Use POSIX extended/basic regexp for patterns.  Default
 	is to use basic regexp.
 
+-P::
+--perl-regexp::
+	Use Perl-compatible regexp for patterns. Requires libpcre to be
+	compiled in.
+
 -F::
 --fixed-strings::
 	Use fixed strings for patterns (don't interpret pattern
diff --git a/Makefile b/Makefile
index 3a1fe20..717a47c 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,12 @@ all::
 # Define NO_OPENSSL environment variable if you do not have OpenSSL.
 # This also implies BLK_SHA1.
 #
+# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
+# able to use Perl-compatible regular expressions.
+#
+# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
+# /foo/bar/include and /foo/bar/lib directories.
+#
 # Define NO_CURL if you do not have libcurl installed.  git-http-pull and
 # git-http-push are not built, and you cannot use http:// and https://
 # transports.
@@ -1251,6 +1257,15 @@ ifdef NO_LIBGEN_H
 	COMPAT_OBJS += compat/basename.o
 endif
 
+ifdef USE_LIBPCRE
+	BASIC_CFLAGS += -DUSE_LIBPCRE
+	ifdef LIBPCREDIR
+		BASIC_CFLAGS += -I$(LIBPCREDIR)/include
+		EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
+	endif
+	EXTLIBS += -lpcre
+endif
+
 ifdef NO_CURL
 	BASIC_CFLAGS += -DNO_CURL
 	REMOTE_CURL_PRIMARY =
diff --git a/builtin/grep.c b/builtin/grep.c
index 10a1f65..6831975 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -781,6 +781,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 			REG_EXTENDED),
 		OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
 			"interpret patterns as fixed strings"),
+		OPT_BOOLEAN('P', "perl-regexp", &opt.pcre,
+				"use Perl-compatible regular expressions"),
 		OPT_GROUP(""),
 		OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
 		OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash
index 4b2654d..95790a1 100755
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -1487,6 +1487,7 @@ _git_grep ()
 			--text --ignore-case --word-regexp --invert-match
 			--full-name --line-number
 			--extended-regexp --basic-regexp --fixed-strings
+			--perl-regexp
 			--files-with-matches --name-only
 			--files-without-match
 			--max-depth
diff --git a/grep.c b/grep.c
index 870d10c..d03d9e2 100644
--- a/grep.c
+++ b/grep.c
@@ -74,6 +74,69 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 	die("%s'%s': %s", where, p->pattern, error);
 }
 
+#ifdef USE_LIBPCRE
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+	const char *error;
+	int erroffset;
+	int options = 0;
+
+	if (opt->ignore_case)
+		options |= PCRE_CASELESS;
+
+	p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
+			NULL);
+	if (!p->pcre_regexp)
+		compile_regexp_failed(p, error);
+
+	p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
+	if (!p->pcre_extra_info && error)
+		die("%s", error);
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+		regmatch_t *match, int eflags)
+{
+	int ovector[30], ret, flags = 0;
+
+	if (eflags & REG_NOTBOL)
+		flags |= PCRE_NOTBOL;
+
+	ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
+			0, flags, ovector, ARRAY_SIZE(ovector));
+	if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
+		die("pcre_exec failed with error code %d", ret);
+	if (ret > 0) {
+		ret = 0;
+		match->rm_so = ovector[0];
+		match->rm_eo = ovector[1];
+	}
+
+	return ret;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+	pcre_free(p->pcre_regexp);
+	pcre_free(p->pcre_extra_info);
+}
+#else /* !USE_LIBPCRE */
+static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
+{
+	die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
+}
+
+static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
+		regmatch_t *match, int eflags)
+{
+	return 1;
+}
+
+static void free_pcre_regexp(struct grep_pat *p)
+{
+}
+#endif /* !USE_LIBPCRE */
+
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
 	int err;
@@ -85,6 +148,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 	if (p->fixed)
 		return;
 
+	if (opt->pcre) {
+		compile_pcre_regexp(p, opt);
+		return;
+	}
+
 	err = regcomp(&p->regexp, p->pattern, opt->regflags);
 	if (err) {
 		char errbuf[1024];
@@ -327,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			regfree(&p->regexp);
+			if (p->pcre_regexp)
+				free_pcre_regexp(p);
+			else
+				regfree(&p->regexp);
 			break;
 		default:
 			break;
@@ -426,6 +497,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
 
 	if (p->fixed)
 		hit = !fixmatch(p, line, eol, match);
+	else if (p->pcre_regexp)
+		hit = !pcrematch(p, line, eol, match, eflags);
 	else
 		hit = !regmatch(&p->regexp, line, eol, match, eflags);
 
diff --git a/grep.h b/grep.h
index 06621fe..cd055cd 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,12 @@
 #ifndef GREP_H
 #define GREP_H
 #include "color.h"
+#ifdef USE_LIBPCRE
+#include <pcre.h>
+#else
+typedef int pcre;
+typedef int pcre_extra;
+#endif
 
 enum grep_pat_token {
 	GREP_PATTERN,
@@ -33,6 +39,8 @@ struct grep_pat {
 	size_t patternlen;
 	enum grep_header_field field;
 	regex_t regexp;
+	pcre *pcre_regexp;
+	pcre_extra *pcre_extra_info;
 	unsigned fixed:1;
 	unsigned ignore_case:1;
 	unsigned word_regexp:1;
@@ -83,6 +91,7 @@ struct grep_opt {
 #define GREP_BINARY_TEXT	2
 	int binary;
 	int extended;
+	int pcre;
 	int relative;
 	int pathname;
 	int null_following_name;
-- 
1.7.3.4

  parent reply	other threads:[~2011-05-09 21:53 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-05-09 21:52 [PATCH v3 0/6] Add PCRE support to git-grep Michał Kiedrowicz
2011-05-09 21:52 ` [PATCH v3 1/6] grep: Fix a typo in a comment Michał Kiedrowicz
2011-05-09 21:52 ` [PATCH v3 2/6] grep: Extract compile_regexp_failed() from compile_regexp() Michał Kiedrowicz
2011-05-09 21:52 ` Michał Kiedrowicz [this message]
2011-05-09 21:52 ` [PATCH v3 4/6] configure: Check for libpcre Michał Kiedrowicz
2011-05-09 21:52 ` [PATCH v3 5/6] grep: Add basic tests Michał Kiedrowicz
2011-05-09 21:52 ` [PATCH v3 6/6] git-grep: Bail out when -P is used with -F or -E Michał Kiedrowicz
2011-05-10  1:48   ` Junio C Hamano
2011-05-10  5:24     ` Michal Kiedrowicz
2011-05-10  6:11       ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1304977928-7142-4-git-send-email-michal.kiedrowicz@gmail.com \
    --to=michal.kiedrowicz@gmail.com \
    --cc=bert.wesarg@googlemail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=j.sixt@viscovery.net \
    --cc=martin.langhoff@gmail.com \
    --cc=raa.lkml@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).