git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: [PATCH 8/8] grep: support NUL chars in search strings for -F
Date: Sat, 22 May 2010 23:43:43 +0200	[thread overview]
Message-ID: <4BF8500F.70205@lsrfire.ath.cx> (raw)
In-Reply-To: <4BF84B9E.7060009@lsrfire.ath.cx>

Search patterns in a file specified with -f can contain NUL characters.
The current code ignores all characters on a line after a NUL.

Pass the actual length of the line all the way from the pattern file to
fixmatch() and use it for case-sensitive fixed string matching.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Support for -F was easy, but in order to be able to search for NULs
with -Fi, -G and -E, we'd need a different case-insensitive fixed
string search function (memcasemem?) and a different regex library, or
at least use a different (non-POSIX) entry point.

How badly do we need this feature?  If the new regex lib is faster or
improves multi-platform support then NUL support would be a nice side
effect, I think, but this feature alone doesn't justify a switch in my
eyes.


 builtin/grep.c         |    8 ++++++--
 grep.c                 |   33 ++++++++++++++++++++-------------
 grep.h                 |    2 ++
 t/t7008-grep-binary.sh |   30 ++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index b194ea3..d0a73da 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
 	if (!patterns)
 		die_errno("cannot open '%s'", arg);
 	while (strbuf_getline(&sb, patterns, '\n') == 0) {
+		char *s;
+		size_t len;
+
 		/* ignore empty line like grep does */
 		if (sb.len == 0)
 			continue;
-		append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
-				    ++lno, GREP_PATTERN);
+
+		s = strbuf_detach(&sb, &len);
+		append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
 	}
 	fclose(patterns);
 	strbuf_release(&sb);
diff --git a/grep.c b/grep.c
index 70a776f..82fb349 100644
--- a/grep.c
+++ b/grep.c
@@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
 {
 	struct grep_pat *p = xcalloc(1, sizeof(*p));
 	p->pattern = pat;
+	p->patternlen = strlen(pat);
 	p->origin = "header";
 	p->no = 0;
 	p->token = GREP_PATTERN_HEAD;
@@ -19,8 +20,15 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
 void append_grep_pattern(struct grep_opt *opt, const char *pat,
 			 const char *origin, int no, enum grep_pat_token t)
 {
+	append_grep_pat(opt, pat, strlen(pat), origin, no, t);
+}
+
+void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
+		     const char *origin, int no, enum grep_pat_token t)
+{
 	struct grep_pat *p = xcalloc(1, sizeof(*p));
 	p->pattern = pat;
+	p->patternlen = patlen;
 	p->origin = origin;
 	p->no = no;
 	p->token = t;
@@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
 			append_header_grep_pattern(ret, pat->field,
 						   pat->pattern);
 		else
-			append_grep_pattern(ret, pat->pattern, pat->origin,
-					    pat->no, pat->token);
+			append_grep_pat(ret, pat->pattern, pat->patternlen,
+					pat->origin, pat->no, pat->token);
 	}
 
 	return ret;
@@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
 	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
-static int fixmatch(const char *pattern, char *line, char *eol,
-		    int ignore_case, regmatch_t *match)
+static int fixmatch(struct grep_pat *p, char *line, char *eol,
+		    regmatch_t *match)
 {
 	char *hit;
 
-	if (ignore_case) {
+	if (p->ignore_case) {
 		char *s = line;
 		do {
-			hit = strcasestr(s, pattern);
+			hit = strcasestr(s, p->pattern);
 			if (hit)
 				break;
 			s += strlen(s) + 1;
 		} while (s < eol);
 	} else
-		hit = memmem(line, eol - line, pattern, strlen(pattern));
+		hit = memmem(line, eol - line, p->pattern, p->patternlen);
 
 	if (!hit) {
 		match->rm_so = match->rm_eo = -1;
@@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol,
 	}
 	else {
 		match->rm_so = hit - line;
-		match->rm_eo = match->rm_so + strlen(pattern);
+		match->rm_eo = match->rm_so + p->patternlen;
 		return 0;
 	}
 }
@@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
 
  again:
 	if (p->fixed)
-		hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch);
+		hit = !fixmatch(p, bol, eol, pmatch);
 	else
 		hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
 
@@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt,
 		int hit;
 		regmatch_t m;
 
-		if (p->fixed) {
-			hit = !fixmatch(p->pattern, bol, bol + *left_p,
-					p->ignore_case, &m);
-		} else
+		if (p->fixed)
+			hit = !fixmatch(p, bol, bol + *left_p, &m);
+		else
 			hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
 		if (!hit || m.rm_so < 0 || m.rm_eo < 0)
 			continue;
diff --git a/grep.h b/grep.h
index 89342e5..0aebebd 100644
--- a/grep.h
+++ b/grep.h
@@ -29,6 +29,7 @@ struct grep_pat {
 	int no;
 	enum grep_pat_token token;
 	const char *pattern;
+	size_t patternlen;
 	enum grep_header_field field;
 	regex_t regexp;
 	unsigned fixed:1;
@@ -104,6 +105,7 @@ struct grep_opt {
 	void *output_priv;
 };
 
+extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
 extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
 extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
 extern void compile_grep_patterns(struct grep_opt *opt);
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index 4f5e74f..eb8ca88 100755
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
@@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' '
 	git grep .fi a
 '
 
+test_expect_success 'git grep -F y<NUL>f a' "
+	printf 'y\000f' >f &&
+	git grep -f f -F a
+"
+
+test_expect_success 'git grep -F y<NUL>x a' "
+	printf 'y\000x' >f &&
+	test_must_fail git grep -f f -F a
+"
+
+test_expect_success 'git grep -Fi Y<NUL>f a' "
+	printf 'Y\000f' >f &&
+	git grep -f f -Fi a
+"
+
+test_expect_failure 'git grep -Fi Y<NUL>x a' "
+	printf 'Y\000x' >f &&
+	test_must_fail git grep -f f -Fi a
+"
+
+test_expect_success 'git grep y<NUL>f a' "
+	printf 'y\000f' >f &&
+	git grep -f f a
+"
+
+test_expect_failure 'git grep y<NUL>x a' "
+	printf 'y\000x' >f &&
+	test_must_fail git grep -f f a
+"
+
 test_done
-- 
1.7.1

      parent reply	other threads:[~2010-05-22 21:44 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-19 14:33 What's cooking extra Junio C Hamano
2010-05-19 15:12 ` A Large Angry SCM
2010-05-19 17:06 ` Finn Arne Gangstad
2010-05-19 20:09   ` Eyvind Bernhardsen
2010-05-22 13:09   ` Clemens Buchacher
2010-05-22 19:42     ` Eyvind Bernhardsen
2010-05-22 22:27       ` Clemens Buchacher
2010-05-23 10:36         ` Eyvind Bernhardsen
2010-05-23 11:51           ` Clemens Buchacher
2010-05-23 12:53             ` Eyvind Bernhardsen
2010-05-23 13:26               ` Ævar Arnfjörð Bjarmason
2010-05-24  9:49               ` Clemens Buchacher
2010-05-24 12:47                 ` Dmitry Potapov
2010-05-24 20:45                   ` Eyvind Bernhardsen
2010-05-24 20:56                   ` Clemens Buchacher
2010-05-24 21:09                     ` Eyvind Bernhardsen
2010-05-24 21:11                 ` Eyvind Bernhardsen
2010-05-24 22:11                   ` Clemens Buchacher
2010-05-25  6:41                     ` Eyvind Bernhardsen
2010-05-25  8:27                       ` Anthony Youngman
2010-06-07 19:55                         ` Eyvind Bernhardsen
2010-05-25  8:33                       ` Clemens Buchacher
2010-05-24 12:12             ` Dmitry Potapov
2010-05-24 12:22               ` Erik Faye-Lund
2010-05-24 12:42                 ` Dmitry Potapov
2010-05-21 16:16 ` Ævar Arnfjörð Bjarmason
2010-05-22 21:24 ` René Scharfe
2010-05-22 21:26   ` [PATCH 1/8] grep: add test script for binary file handling René Scharfe
2010-05-22 21:28   ` [PATCH 2/8] grep: grep: refactor handling of binary mode options René Scharfe
2010-05-22 21:29   ` [PATCH 3/8] grep: --count over binary René Scharfe
2010-05-22 21:30   ` [PATCH 4/8] grep: --name-only " René Scharfe
2010-05-22 21:32   ` [PATCH 5/8] grep: use memmem() for fixed string search René Scharfe
2010-05-22 21:34   ` [PATCH 6/8] grep: continue case insensitive fixed string search after NUL chars René Scharfe
2010-05-22 21:35   ` [PATCH 7/8] grep: use REG_STARTEND for all matching if available René Scharfe
2010-05-22 21:43   ` René Scharfe [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BF8500F.70205@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).