From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: [PATCH 8/8] grep: support NUL chars in search strings for -F
Date: Sat, 22 May 2010 23:43:43 +0200 [thread overview]
Message-ID: <4BF8500F.70205@lsrfire.ath.cx> (raw)
In-Reply-To: <4BF84B9E.7060009@lsrfire.ath.cx>
Search patterns in a file specified with -f can contain NUL characters.
The current code ignores all characters on a line after a NUL.
Pass the actual length of the line all the way from the pattern file to
fixmatch() and use it for case-sensitive fixed string matching.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Support for -F was easy, but in order to be able to search for NULs
with -Fi, -G and -E, we'd need a different case-insensitive fixed
string search function (memcasemem?) and a different regex library, or
at least use a different (non-POSIX) entry point.
How badly do we need this feature? If the new regex lib is faster or
improves multi-platform support then NUL support would be a nice side
effect, I think, but this feature alone doesn't justify a switch in my
eyes.
builtin/grep.c | 8 ++++++--
grep.c | 33 ++++++++++++++++++++-------------
grep.h | 2 ++
t/t7008-grep-binary.sh | 30 ++++++++++++++++++++++++++++++
4 files changed, 58 insertions(+), 15 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index b194ea3..d0a73da 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
if (!patterns)
die_errno("cannot open '%s'", arg);
while (strbuf_getline(&sb, patterns, '\n') == 0) {
+ char *s;
+ size_t len;
+
/* ignore empty line like grep does */
if (sb.len == 0)
continue;
- append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
- ++lno, GREP_PATTERN);
+
+ s = strbuf_detach(&sb, &len);
+ append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
}
fclose(patterns);
strbuf_release(&sb);
diff --git a/grep.c b/grep.c
index 70a776f..82fb349 100644
--- a/grep.c
+++ b/grep.c
@@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
+ p->patternlen = strlen(pat);
p->origin = "header";
p->no = 0;
p->token = GREP_PATTERN_HEAD;
@@ -19,8 +20,15 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
void append_grep_pattern(struct grep_opt *opt, const char *pat,
const char *origin, int no, enum grep_pat_token t)
{
+ append_grep_pat(opt, pat, strlen(pat), origin, no, t);
+}
+
+void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
+ const char *origin, int no, enum grep_pat_token t)
+{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
+ p->patternlen = patlen;
p->origin = origin;
p->no = no;
p->token = t;
@@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
append_header_grep_pattern(ret, pat->field,
pat->pattern);
else
- append_grep_pattern(ret, pat->pattern, pat->origin,
- pat->no, pat->token);
+ append_grep_pat(ret, pat->pattern, pat->patternlen,
+ pat->origin, pat->no, pat->token);
}
return ret;
@@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
-static int fixmatch(const char *pattern, char *line, char *eol,
- int ignore_case, regmatch_t *match)
+static int fixmatch(struct grep_pat *p, char *line, char *eol,
+ regmatch_t *match)
{
char *hit;
- if (ignore_case) {
+ if (p->ignore_case) {
char *s = line;
do {
- hit = strcasestr(s, pattern);
+ hit = strcasestr(s, p->pattern);
if (hit)
break;
s += strlen(s) + 1;
} while (s < eol);
} else
- hit = memmem(line, eol - line, pattern, strlen(pattern));
+ hit = memmem(line, eol - line, p->pattern, p->patternlen);
if (!hit) {
match->rm_so = match->rm_eo = -1;
@@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol,
}
else {
match->rm_so = hit - line;
- match->rm_eo = match->rm_so + strlen(pattern);
+ match->rm_eo = match->rm_so + p->patternlen;
return 0;
}
}
@@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
again:
if (p->fixed)
- hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch);
+ hit = !fixmatch(p, bol, eol, pmatch);
else
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
@@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt,
int hit;
regmatch_t m;
- if (p->fixed) {
- hit = !fixmatch(p->pattern, bol, bol + *left_p,
- p->ignore_case, &m);
- } else
+ if (p->fixed)
+ hit = !fixmatch(p, bol, bol + *left_p, &m);
+ else
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
diff --git a/grep.h b/grep.h
index 89342e5..0aebebd 100644
--- a/grep.h
+++ b/grep.h
@@ -29,6 +29,7 @@ struct grep_pat {
int no;
enum grep_pat_token token;
const char *pattern;
+ size_t patternlen;
enum grep_header_field field;
regex_t regexp;
unsigned fixed:1;
@@ -104,6 +105,7 @@ struct grep_opt {
void *output_priv;
};
+extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
extern void compile_grep_patterns(struct grep_opt *opt);
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index 4f5e74f..eb8ca88 100755
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
@@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' '
git grep .fi a
'
+test_expect_success 'git grep -F y<NUL>f a' "
+ printf 'y\000f' >f &&
+ git grep -f f -F a
+"
+
+test_expect_success 'git grep -F y<NUL>x a' "
+ printf 'y\000x' >f &&
+ test_must_fail git grep -f f -F a
+"
+
+test_expect_success 'git grep -Fi Y<NUL>f a' "
+ printf 'Y\000f' >f &&
+ git grep -f f -Fi a
+"
+
+test_expect_failure 'git grep -Fi Y<NUL>x a' "
+ printf 'Y\000x' >f &&
+ test_must_fail git grep -f f -Fi a
+"
+
+test_expect_success 'git grep y<NUL>f a' "
+ printf 'y\000f' >f &&
+ git grep -f f a
+"
+
+test_expect_failure 'git grep y<NUL>x a' "
+ printf 'y\000x' >f &&
+ test_must_fail git grep -f f a
+"
+
test_done
--
1.7.1
prev parent reply other threads:[~2010-05-22 21:44 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-19 14:33 What's cooking extra Junio C Hamano
2010-05-19 15:12 ` A Large Angry SCM
2010-05-19 17:06 ` Finn Arne Gangstad
2010-05-19 20:09 ` Eyvind Bernhardsen
2010-05-22 13:09 ` Clemens Buchacher
2010-05-22 19:42 ` Eyvind Bernhardsen
2010-05-22 22:27 ` Clemens Buchacher
2010-05-23 10:36 ` Eyvind Bernhardsen
2010-05-23 11:51 ` Clemens Buchacher
2010-05-23 12:53 ` Eyvind Bernhardsen
2010-05-23 13:26 ` Ævar Arnfjörð Bjarmason
2010-05-24 9:49 ` Clemens Buchacher
2010-05-24 12:47 ` Dmitry Potapov
2010-05-24 20:45 ` Eyvind Bernhardsen
2010-05-24 20:56 ` Clemens Buchacher
2010-05-24 21:09 ` Eyvind Bernhardsen
2010-05-24 21:11 ` Eyvind Bernhardsen
2010-05-24 22:11 ` Clemens Buchacher
2010-05-25 6:41 ` Eyvind Bernhardsen
2010-05-25 8:27 ` Anthony Youngman
2010-06-07 19:55 ` Eyvind Bernhardsen
2010-05-25 8:33 ` Clemens Buchacher
2010-05-24 12:12 ` Dmitry Potapov
2010-05-24 12:22 ` Erik Faye-Lund
2010-05-24 12:42 ` Dmitry Potapov
2010-05-21 16:16 ` Ævar Arnfjörð Bjarmason
2010-05-22 21:24 ` René Scharfe
2010-05-22 21:26 ` [PATCH 1/8] grep: add test script for binary file handling René Scharfe
2010-05-22 21:28 ` [PATCH 2/8] grep: grep: refactor handling of binary mode options René Scharfe
2010-05-22 21:29 ` [PATCH 3/8] grep: --count over binary René Scharfe
2010-05-22 21:30 ` [PATCH 4/8] grep: --name-only " René Scharfe
2010-05-22 21:32 ` [PATCH 5/8] grep: use memmem() for fixed string search René Scharfe
2010-05-22 21:34 ` [PATCH 6/8] grep: continue case insensitive fixed string search after NUL chars René Scharfe
2010-05-22 21:35 ` [PATCH 7/8] grep: use REG_STARTEND for all matching if available René Scharfe
2010-05-22 21:43 ` René Scharfe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BF8500F.70205@lsrfire.ath.cx \
--to=rene.scharfe@lsrfire.ath.cx \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).