From: Michael J Gruber <git@drmicha.warpmail.net>
To: git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>, Jeff King <peff@peff.net>
Subject: [RFC/PATCH 3/4] grep: allow to use textconv filters
Date: Wed, 6 Feb 2013 16:08:52 +0100 [thread overview]
Message-ID: <da8c01b918c94c84ab61859b1b1453885bff5b06.1360162813.git.git@drmicha.warpmail.net> (raw)
In-Reply-To: <20130205201106.GA29248@sigill.intra.peff.net>
In-Reply-To: <cover.1360162813.git.git@drmicha.warpmail.net>
From: Jeff King <peff@peff.net>
Recently and not so recently, we made sure that log/grep type operations
use textconv filters when a userfacing diff would do the same:
ef90ab6 (pickaxe: use textconv for -S counting, 2012-10-28)
b1c2f57 (diff_grep: use textconv buffers for add/deleted files, 2012-10-28)
0508fe5 (combine-diff: respect textconv attributes, 2011-05-23)
"git grep" currently does not use textconv filters at all, that is
neither for displaying the match and context nor for the actual grepping.
Introduce an option "--textconv" which makes git grep use any configured
textconv filters for grepping and output purposes. It is off by default.
Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---
builtin/grep.c | 2 ++
grep.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++--------
grep.h | 1 +
3 files changed, 89 insertions(+), 14 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index 8025964..915c8ef 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -659,6 +659,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_SET_INT('I', NULL, &opt.binary,
N_("don't match patterns in binary files"),
GREP_BINARY_NOMATCH),
+ OPT_BOOL(0, "textconv", &opt.allow_textconv,
+ N_("process binary files with textconv filters")),
{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"),
N_("descend at most <depth> levels"), PARSE_OPT_NONEG,
NULL, 1 },
diff --git a/grep.c b/grep.c
index 4bd1b8b..3880d64 100644
--- a/grep.c
+++ b/grep.c
@@ -2,6 +2,8 @@
#include "grep.h"
#include "userdiff.h"
#include "xdiff-interface.h"
+#include "diff.h"
+#include "diffcore.h"
static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
@@ -1321,6 +1323,58 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size)
fwrite(buf, size, 1, stdout);
}
+static int fill_textconv_grep(struct userdiff_driver *driver,
+ struct grep_source *gs)
+{
+ struct diff_filespec *df;
+ char *buf;
+ size_t size;
+
+ if (!driver || !driver->textconv)
+ return grep_source_load(gs);
+
+ /*
+ * The textconv interface is intimately tied to diff_filespecs, so we
+ * have to pretend to be one. If we could unify the grep_source
+ * and diff_filespec structs, this mess could just go away.
+ */
+ df = alloc_filespec(gs->path);
+ switch (gs->type) {
+ case GREP_SOURCE_SHA1:
+ fill_filespec(df, gs->identifier, 1, 0100644);
+ break;
+ case GREP_SOURCE_FILE:
+ fill_filespec(df, null_sha1, 0, 0100644);
+ break;
+ default:
+ die("BUG: attempt to textconv something without a path?");
+ }
+
+ /*
+ * fill_textconv is not remotely thread-safe; it may load objects
+ * behind the scenes, and it modifies the global diff tempfile
+ * structure.
+ */
+ grep_read_lock();
+ size = fill_textconv(driver, df, &buf);
+ grep_read_unlock();
+ free_filespec(df);
+
+ /*
+ * The normal fill_textconv usage by the diff machinery would just keep
+ * the textconv'd buf separate from the diff_filespec. But much of the
+ * grep code passes around a grep_source and assumes that its "buf"
+ * pointer is the beginning of the thing we are searching. So let's
+ * install our textconv'd version into the grep_source, taking care not
+ * to leak any existing buffer.
+ */
+ grep_source_clear_data(gs);
+ gs->buf = buf;
+ gs->size = size;
+
+ return 0;
+}
+
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{
char *bol;
@@ -1331,6 +1385,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
unsigned count = 0;
int try_lookahead = 0;
int show_function = 0;
+ struct userdiff_driver *textconv = NULL;
enum grep_context ctx = GREP_CONTEXT_HEAD;
xdemitconf_t xecfg;
@@ -1352,19 +1407,36 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
}
opt->last_shown = 0;
- switch (opt->binary) {
- case GREP_BINARY_DEFAULT:
- if (grep_source_is_binary(gs))
- binary_match_only = 1;
- break;
- case GREP_BINARY_NOMATCH:
- if (grep_source_is_binary(gs))
- return 0; /* Assume unmatch */
- break;
- case GREP_BINARY_TEXT:
- break;
- default:
- die("bug: unknown binary handling mode");
+ if (opt->allow_textconv) {
+ grep_source_load_driver(gs);
+ /*
+ * We might set up the shared textconv cache data here, which
+ * is not thread-safe.
+ */
+ grep_attr_lock();
+ textconv = userdiff_get_textconv(gs->driver);
+ grep_attr_unlock();
+ }
+
+ /*
+ * We know the result of a textconv is text, so we only have to care
+ * about binary handling if we are not using it.
+ */
+ if (!textconv) {
+ switch (opt->binary) {
+ case GREP_BINARY_DEFAULT:
+ if (grep_source_is_binary(gs))
+ binary_match_only = 1;
+ break;
+ case GREP_BINARY_NOMATCH:
+ if (grep_source_is_binary(gs))
+ return 0; /* Assume unmatch */
+ break;
+ case GREP_BINARY_TEXT:
+ break;
+ default:
+ die("bug: unknown binary handling mode");
+ }
}
memset(&xecfg, 0, sizeof(xecfg));
@@ -1372,7 +1444,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
try_lookahead = should_lookahead(opt);
- if (grep_source_load(gs) < 0)
+ if (fill_textconv_grep(textconv, gs) < 0)
return 0;
bol = gs->buf;
diff --git a/grep.h b/grep.h
index 8fc854f..94a7ac2 100644
--- a/grep.h
+++ b/grep.h
@@ -106,6 +106,7 @@ struct grep_opt {
#define GREP_BINARY_NOMATCH 1
#define GREP_BINARY_TEXT 2
int binary;
+ int allow_textconv;
int extended;
int use_reflog_filter;
int pcre;
--
1.8.1.2.752.g32d147e
next prev parent reply other threads:[~2013-02-06 15:09 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-04 15:27 [WIP/RFH/RFD/PATCH] grep: allow to use textconv filters Michael J Gruber
2013-02-04 17:12 ` Junio C Hamano
2013-02-05 8:48 ` Michael J Gruber
2013-02-05 11:13 ` Jeff King
2013-02-05 16:21 ` Michael J Gruber
2013-02-05 20:11 ` Jeff King
2013-02-06 15:08 ` [RFC/PATCH 0/4] textconv for show and grep Michael J Gruber
2013-02-06 15:08 ` [RFC/PATCH 1/4] show: obey --textconv for blobs Michael J Gruber
2013-02-06 16:53 ` Junio C Hamano
2013-02-06 22:12 ` Jeff King
2013-02-06 23:49 ` Junio C Hamano
2013-02-07 0:10 ` Jeff King
2013-02-07 0:26 ` Junio C Hamano
2013-02-07 8:48 ` Michael J Gruber
2013-02-06 22:06 ` Jeff King
2013-02-07 9:05 ` Michael J Gruber
2013-02-07 9:11 ` Jeff King
2013-02-07 9:34 ` Michael J Gruber
2013-02-07 9:43 ` Jeff King
2013-02-06 15:08 ` [RFC/PATCH 2/4] cat-file: do not die on --textconv without textconv filters Michael J Gruber
2013-02-06 16:47 ` Junio C Hamano
2013-02-06 22:19 ` Jeff King
2013-02-06 22:23 ` Junio C Hamano
2013-02-06 22:43 ` Jeff King
2013-02-06 15:08 ` Michael J Gruber [this message]
2013-02-06 15:12 ` [RFC/PATCH 3/4] grep: allow to use " Matthieu Moy
2013-02-06 22:23 ` Jeff King
2013-02-06 15:08 ` [RFC/PATCH 4/4] grep: obey --textconv for the case rev:path Michael J Gruber
2013-02-06 22:36 ` Jeff King
2013-02-07 9:05 ` Michael J Gruber
2013-02-07 9:26 ` Jeff King
2013-02-07 9:47 ` Michael J Gruber
2013-02-07 9:55 ` Jeff King
2013-02-07 10:31 ` Michael J Gruber
2013-02-07 18:03 ` Junio C Hamano
2013-02-08 11:27 ` Michael J Gruber
2013-02-06 16:55 ` [RFC/PATCH 0/4] textconv for show and grep Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=da8c01b918c94c84ab61859b1b1453885bff5b06.1360162813.git.git@drmicha.warpmail.net \
--to=git@drmicha.warpmail.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).