From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: [PATCH 8/8] grep: support NUL chars in search strings for -F
Date: Sat, 22 May 2010 23:43:43 +0200 [thread overview]
Message-ID: <4BF8500F.70205@lsrfire.ath.cx> (raw)
In-Reply-To: <4BF84B9E.7060009@lsrfire.ath.cx>
Search patterns in a file specified with -f can contain NUL characters.
The current code ignores all characters on a line after a NUL.
Pass the actual length of the line all the way from the pattern file to
fixmatch() and use it for case-sensitive fixed string matching.
Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
Support for -F was easy, but in order to be able to search for NULs
with -Fi, -G and -E, we'd need a different case-insensitive fixed
string search function (memcasemem?) and a different regex library, or
at least use a different (non-POSIX) entry point.
How badly do we need this feature? If the new regex lib is faster or
improves multi-platform support then NUL support would be a nice side
effect, I think, but this feature alone doesn't justify a switch in my
eyes.
builtin/grep.c | 8 ++++++--
grep.c | 33 ++++++++++++++++++++-------------
grep.h | 2 ++
t/t7008-grep-binary.sh | 30 ++++++++++++++++++++++++++++++
4 files changed, 58 insertions(+), 15 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index b194ea3..d0a73da 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -724,11 +724,15 @@ static int file_callback(const struct option *opt, const char *arg, int unset)
if (!patterns)
die_errno("cannot open '%s'", arg);
while (strbuf_getline(&sb, patterns, '\n') == 0) {
+ char *s;
+ size_t len;
+
/* ignore empty line like grep does */
if (sb.len == 0)
continue;
- append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
- ++lno, GREP_PATTERN);
+
+ s = strbuf_detach(&sb, &len);
+ append_grep_pat(grep_opt, s, len, arg, ++lno, GREP_PATTERN);
}
fclose(patterns);
strbuf_release(&sb);
diff --git a/grep.c b/grep.c
index 70a776f..82fb349 100644
--- a/grep.c
+++ b/grep.c
@@ -7,6 +7,7 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
+ p->patternlen = strlen(pat);
p->origin = "header";
p->no = 0;
p->token = GREP_PATTERN_HEAD;
@@ -19,8 +20,15 @@ void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field fie
void append_grep_pattern(struct grep_opt *opt, const char *pat,
const char *origin, int no, enum grep_pat_token t)
{
+ append_grep_pat(opt, pat, strlen(pat), origin, no, t);
+}
+
+void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
+ const char *origin, int no, enum grep_pat_token t)
+{
struct grep_pat *p = xcalloc(1, sizeof(*p));
p->pattern = pat;
+ p->patternlen = patlen;
p->origin = origin;
p->no = no;
p->token = t;
@@ -44,8 +52,8 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
append_header_grep_pattern(ret, pat->field,
pat->pattern);
else
- append_grep_pattern(ret, pat->pattern, pat->origin,
- pat->no, pat->token);
+ append_grep_pat(ret, pat->pattern, pat->patternlen,
+ pat->origin, pat->no, pat->token);
}
return ret;
@@ -329,21 +337,21 @@ static void show_name(struct grep_opt *opt, const char *name)
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
-static int fixmatch(const char *pattern, char *line, char *eol,
- int ignore_case, regmatch_t *match)
+static int fixmatch(struct grep_pat *p, char *line, char *eol,
+ regmatch_t *match)
{
char *hit;
- if (ignore_case) {
+ if (p->ignore_case) {
char *s = line;
do {
- hit = strcasestr(s, pattern);
+ hit = strcasestr(s, p->pattern);
if (hit)
break;
s += strlen(s) + 1;
} while (s < eol);
} else
- hit = memmem(line, eol - line, pattern, strlen(pattern));
+ hit = memmem(line, eol - line, p->pattern, p->patternlen);
if (!hit) {
match->rm_so = match->rm_eo = -1;
@@ -351,7 +359,7 @@ static int fixmatch(const char *pattern, char *line, char *eol,
}
else {
match->rm_so = hit - line;
- match->rm_eo = match->rm_so + strlen(pattern);
+ match->rm_eo = match->rm_so + p->patternlen;
return 0;
}
}
@@ -417,7 +425,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
again:
if (p->fixed)
- hit = !fixmatch(p->pattern, bol, eol, p->ignore_case, pmatch);
+ hit = !fixmatch(p, bol, eol, pmatch);
else
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
@@ -743,10 +751,9 @@ static int look_ahead(struct grep_opt *opt,
int hit;
regmatch_t m;
- if (p->fixed) {
- hit = !fixmatch(p->pattern, bol, bol + *left_p,
- p->ignore_case, &m);
- } else
+ if (p->fixed)
+ hit = !fixmatch(p, bol, bol + *left_p, &m);
+ else
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
diff --git a/grep.h b/grep.h
index 89342e5..0aebebd 100644
--- a/grep.h
+++ b/grep.h
@@ -29,6 +29,7 @@ struct grep_pat {
int no;
enum grep_pat_token token;
const char *pattern;
+ size_t patternlen;
enum grep_header_field field;
regex_t regexp;
unsigned fixed:1;
@@ -104,6 +105,7 @@ struct grep_opt {
void *output_priv;
};
+extern void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, const char *origin, int no, enum grep_pat_token t);
extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
extern void append_header_grep_pattern(struct grep_opt *, enum grep_header_field, const char *);
extern void compile_grep_patterns(struct grep_opt *opt);
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index 4f5e74f..eb8ca88 100755
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
@@ -69,4 +69,34 @@ test_expect_failure 'git grep .fi a' '
git grep .fi a
'
+test_expect_success 'git grep -F y<NUL>f a' "
+ printf 'y\000f' >f &&
+ git grep -f f -F a
+"
+
+test_expect_success 'git grep -F y<NUL>x a' "
+ printf 'y\000x' >f &&
+ test_must_fail git grep -f f -F a
+"
+
+test_expect_success 'git grep -Fi Y<NUL>f a' "
+ printf 'Y\000f' >f &&
+ git grep -f f -Fi a
+"
+
+test_expect_failure 'git grep -Fi Y<NUL>x a' "
+ printf 'Y\000x' >f &&
+ test_must_fail git grep -f f -Fi a
+"
+
+test_expect_success 'git grep y<NUL>f a' "
+ printf 'y\000f' >f &&
+ git grep -f f a
+"
+
+test_expect_failure 'git grep y<NUL>x a' "
+ printf 'y\000x' >f &&
+ test_must_fail git grep -f f a
+"
+
test_done
--
1.7.1
prev parent reply other threads:[~2010-05-22 21:44 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-19 14:33 What's cooking extra Junio C Hamano
2010-05-19 15:12 ` A Large Angry SCM
2010-05-19 17:06 ` Finn Arne Gangstad
2010-05-19 20:09 ` Eyvind Bernhardsen
2010-05-22 13:09 ` Clemens Buchacher
2010-05-22 19:42 ` Eyvind Bernhardsen
2010-05-22 22:27 ` Clemens Buchacher
2010-05-23 10:36 ` Eyvind Bernhardsen
2010-05-23 11:51 ` Clemens Buchacher
2010-05-23 12:53 ` Eyvind Bernhardsen
2010-05-23 13:26 ` Ævar Arnfjörð Bjarmason
2010-05-24 9:49 ` Clemens Buchacher
2010-05-24 12:47 ` Dmitry Potapov
2010-05-24 20:45 ` Eyvind Bernhardsen
2010-05-24 20:56 ` Clemens Buchacher
2010-05-24 21:09 ` Eyvind Bernhardsen
2010-05-24 21:11 ` Eyvind Bernhardsen
2010-05-24 22:11 ` Clemens Buchacher
2010-05-25 6:41 ` Eyvind Bernhardsen
2010-05-25 8:27 ` Anthony Youngman
2010-06-07 19:55 ` Eyvind Bernhardsen
2010-05-25 8:33 ` Clemens Buchacher
2010-05-24 12:12 ` Dmitry Potapov
2010-05-24 12:22 ` Erik Faye-Lund
2010-05-24 12:42 ` Dmitry Potapov
2010-05-21 16:16 ` Ævar Arnfjörð Bjarmason
2010-05-22 21:24 ` René Scharfe
2010-05-22 21:26 ` [PATCH 1/8] grep: add test script for binary file handling René Scharfe
2010-05-22 21:28 ` [PATCH 2/8] grep: grep: refactor handling of binary mode options René Scharfe
2010-05-22 21:29 ` [PATCH 3/8] grep: --count over binary René Scharfe
2010-05-22 21:30 ` [PATCH 4/8] grep: --name-only " René Scharfe
2010-05-22 21:32 ` [PATCH 5/8] grep: use memmem() for fixed string search René Scharfe
2010-05-22 21:34 ` [PATCH 6/8] grep: continue case insensitive fixed string search after NUL chars René Scharfe
2010-05-22 21:35 ` [PATCH 7/8] grep: use REG_STARTEND for all matching if available René Scharfe
2010-05-22 21:43 ` René Scharfe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4BF8500F.70205@lsrfire.ath.cx \
--to=rene.scharfe@lsrfire.ath.cx \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.