* [PATCH] built-in "git grep" (git grip).
@ 2006-05-01 6:32 Junio C Hamano
2006-05-01 6:56 ` Jakub Narebski
` (2 more replies)
0 siblings, 3 replies; 21+ messages in thread
From: Junio C Hamano @ 2006-05-01 6:32 UTC (permalink / raw)
To: git
This attempts to set up built-in "git grep" to further reduce
our dependence on the shell, while at the same time optionally
allowing to run grep against object database. You could do
funky things like these:
git grep --cached -e pattern ;# grep from index
git grep -e pattern master ;# or in a rev
git grep -e pattern master next ;# or in multiple revs
git grep -e pattern pu^@ ;# even like this with an
;# extension from another topic ;-)
git grep -e pattern master..next ;# or even from rev ranges
git grep -e pattern master~20:Documentation
;# or an arbitrary tree
git grep -e pattern next:git-commit.sh
;# or an arbitrary blob
Right now, it does not understand and/or obey many options grep
should accept, and the pattern matcher using POSIX.2 regex seems
to be excruciatingly slow (I lifted it from Pasky's regexp
pickaxe code almost verbatim without thinking -- I was too
tired). Help to improve things in the grep_buffer() function is
very much appreciated.
But this is going in the right direction. The shell script
version is one of the worst Portability offender in the git
barebone Porcelainish; it uses xargs -0 to pass paths around and
shell arrays to sift flags and parameters.
In order to stay out of the way of real work people want to get
done with the real "git grep", for now this implementation is
called "git grip".
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Makefile | 2
builtin-grep.c | 377 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
builtin.h | 1
git.c | 1
4 files changed, 380 insertions(+), 1 deletions(-)
diff --git a/Makefile b/Makefile
index 8ce27a6..8d5122b 100644
--- a/Makefile
+++ b/Makefile
@@ -214,7 +214,7 @@ LIB_OBJS = \
$(DIFF_OBJS)
BUILTIN_OBJS = \
- builtin-log.o builtin-help.o
+ builtin-log.o builtin-help.o builtin-grep.o
GITLIBS = $(LIB_FILE) $(XDIFF_LIB)
LIBS = $(GITLIBS) -lz
diff --git a/builtin-grep.c b/builtin-grep.c
new file mode 100644
index 0000000..adcdbaa
--- /dev/null
+++ b/builtin-grep.c
@@ -0,0 +1,377 @@
+/*
+ * Builtin "git grep"
+ *
+ * Copyright (c) 2006 Junio C Hamano
+ */
+#include "cache.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
+#include "tag.h"
+#include "diff.h"
+#include "revision.h"
+#include "builtin.h"
+#include <regex.h>
+
+struct grep_opt {
+ const char *pattern;
+ regex_t regexp;
+ unsigned linenum:1;
+ unsigned pre_context;
+ unsigned post_context;
+};
+
+static char *end_of_line(char *cp, unsigned long *left)
+{
+ unsigned long l = *left;
+ while (l && *cp != '\n') {
+ l--;
+ cp++;
+ }
+ *left = l;
+ return cp;
+}
+
+static int grep_buffer(struct grep_opt *opt, const char *name,
+ char *buf, unsigned long size)
+{
+ char *bol = buf;
+ unsigned long left = size;
+ unsigned lno = 1;
+ int hit = 0;
+
+ while (left) {
+ regmatch_t pmatch[10];
+ int flags = 0;
+ char *eol, *cp, ch;
+ eol = end_of_line(bol, &left);
+ ch = *eol;
+ *eol = 0;
+ for (cp = bol; cp < eol; cp++) {
+ int status = regexec(&opt->regexp, cp,
+ ARRAY_SIZE(pmatch), pmatch,
+ flags);
+ if (status == REG_NOMATCH)
+ flags |= REG_NOTBOL;
+ else if (status == 0) {
+ /* Hit at this line */
+ printf("%s:", name);
+ if (opt->linenum)
+ printf("%d:", lno);
+ printf("%.*s\n", eol-bol, bol);
+ hit = 1;
+ break;
+ }
+ }
+ *eol = ch;
+ bol = eol + 1;
+ left--;
+ lno++;
+ }
+ return hit;
+}
+
+static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name)
+{
+ unsigned long size;
+ char *data;
+ char type[20];
+ int hit;
+ data = read_sha1_file(sha1, type, &size);
+ if (!data) {
+ error("'%s': unable to read %s", name, sha1_to_hex(sha1));
+ return 0;
+ }
+ hit = grep_buffer(opt, name, data, size);
+ free(data);
+ return hit;
+}
+
+static int grep_file(struct grep_opt *opt, const char *filename)
+{
+ struct stat st;
+ int i;
+ char *data;
+ if (lstat(filename, &st) < 0) {
+ err_ret:
+ if (errno != ENOENT)
+ error("'%s': %s", filename, strerror(errno));
+ return 0;
+ }
+ if (!st.st_size)
+ return 0; /* empty file -- no grep hit */
+ if (!S_ISREG(st.st_mode))
+ return 0;
+ i = open(filename, O_RDONLY);
+ if (i < 0)
+ goto err_ret;
+ data = xmalloc(st.st_size + 1);
+ if (st.st_size != xread(i, data, st.st_size)) {
+ error("'%s': short read %s", filename, strerror(errno));
+ close(i);
+ free(data);
+ return 0;
+ }
+ close(i);
+ i = grep_buffer(opt, filename, data, st.st_size);
+ free(data);
+ return i;
+}
+
+static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached)
+{
+ int hit = 0;
+ int nr;
+ read_cache();
+
+ for (nr = 0; nr < active_nr; nr++) {
+ struct cache_entry *ce = active_cache[nr];
+ if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode)))
+ continue;
+ if (revs->diffopt.nr_paths) {
+ int i;
+ int namelen = ce_namelen(ce);
+ const char *name = ce->name;
+ for (i = 0; i < revs->diffopt.nr_paths; i++) {
+ const char *match = revs->diffopt.paths[i];
+ int matchlen = revs->diffopt.pathlens[i];
+ if (matchlen <= namelen)
+ if (!strncmp(name, match, matchlen))
+ break;
+ }
+ if (revs->diffopt.nr_paths <= i)
+ continue;
+ }
+ if (cached)
+ hit |= grep_sha1(opt, ce->sha1, ce->name);
+ else
+ hit |= grep_file(opt, ce->name);
+ }
+ return hit;
+}
+
+static int pathspec_matches(struct diff_options *opt, const char *name)
+{
+ int i;
+ int namelen;
+ if (!opt->nr_paths)
+ return 1;
+ namelen = strlen(name);
+ for (i = 0; i < opt->nr_paths; i++) {
+ const char *match = opt->paths[i];
+ int matchlen = opt->pathlens[i];
+ if (matchlen <= namelen)
+ if (!strncmp(name, match, matchlen))
+ return 1;
+ }
+ return 0;
+}
+
+static int grep_tree(struct grep_opt *opt, struct rev_info *revs,
+ struct tree_desc *tree,
+ const char *tree_name, const char *base)
+{
+ unsigned mode;
+ int len;
+ int hit = 0;
+ const char *path;
+ const unsigned char *sha1;
+ char *down_base;
+ char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100);
+
+ if (tree_name[0]) {
+ int offset = sprintf(path_buf, "%s:", tree_name);
+ down_base = path_buf + offset;
+ strcat(down_base, base);
+ }
+ else {
+ down_base = path_buf;
+ strcpy(down_base, base);
+ }
+ len = strlen(path_buf);
+
+ while (tree->size) {
+ int pathlen;
+ sha1 = tree_entry_extract(tree, &path, &mode);
+ pathlen = strlen(path);
+ strcpy(path_buf + len, path);
+
+ if (!pathspec_matches(&revs->diffopt, down_base))
+ ;
+ else if (S_ISREG(mode))
+ hit |= grep_sha1(opt, sha1, path_buf);
+ else if (S_ISDIR(mode)) {
+ char type[20];
+ struct tree_desc sub;
+ void *data;
+ data = read_sha1_file(sha1, type, &sub.size);
+ if (!data)
+ die("unable to read tree (%s)",
+ sha1_to_hex(sha1));
+ strcpy(path_buf + len + pathlen, "/");
+ sub.buf = data;
+ hit = grep_tree(opt, revs, &sub, tree_name, down_base);
+ free(data);
+ }
+ update_tree_entry(tree);
+ }
+ return hit;
+}
+
+static int grep_object(struct grep_opt *opt, struct rev_info *revs,
+ struct object *obj, const char *name)
+{
+ if (!strcmp(obj->type, blob_type))
+ return grep_sha1(opt, obj->sha1, name);
+ if (!strcmp(obj->type, commit_type) ||
+ !strcmp(obj->type, tree_type)) {
+ struct tree_desc tree;
+ void *data;
+ int hit;
+ data = read_object_with_reference(obj->sha1, tree_type,
+ &tree.size, NULL);
+ if (!data)
+ die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
+ tree.buf = data;
+ hit = grep_tree(opt, revs, &tree, name, "");
+ free(data);
+ return hit;
+ }
+ die("unable to grep from object of type %s", obj->type);
+}
+
+static const char builtin_grep_usage[] =
+"git-grep <option>* <rev>* [-e] <pattern> [<path>...]";
+
+int cmd_grep(int argc, const char **argv, char **envp)
+{
+ struct rev_info rev;
+ const char **dst, **src;
+ int err;
+ int hit = 0;
+ int no_more_arg = 0;
+ int seen_range = 0;
+ int seen_noncommit = 0;
+ int cached = 0;
+ struct grep_opt opt;
+ struct object_list *list;
+
+ memset(&opt, 0, sizeof(opt));
+
+ /*
+ * Interpret and remove the grep options upfront. Sigh...
+ */
+ for (dst = src = &argv[1]; src < argc + argv; ) {
+ const char *arg = *src++;
+ if (!no_more_arg) {
+ if (!strcmp("--", arg)) {
+ no_more_arg = 1;
+ *dst++ = arg;
+ continue;
+ }
+ if (!strcmp("--cached", arg)) {
+ cached = 1;
+ continue;
+ }
+ if (!strcmp("-e", arg)) {
+ if (src < argc + argv) {
+ opt.pattern = *src++;
+ continue;
+ }
+ usage(builtin_grep_usage);
+ }
+ if (!strcmp("-n", arg)) {
+ opt.linenum = 1;
+ continue;
+ }
+ if (!strcmp("-H", arg)) {
+ /* We always show the pathname, so this
+ * is a noop.
+ */
+ continue;
+ }
+ if (!strcmp("-A", arg) ||
+ !strcmp("-B", arg) ||
+ !strcmp("-C", arg)) {
+ unsigned num;
+ if ((argc + argv <= src) ||
+ sscanf(*src++, "%u", &num) != 1)
+ usage(builtin_grep_usage);
+ switch (arg[1]) {
+ case 'A':
+ opt.post_context = num;
+ break;
+ case 'C':
+ opt.post_context = num;
+ case 'B':
+ opt.pre_context = num;
+ break;
+ }
+ continue;
+ }
+ }
+ *dst++ = arg;
+ }
+ if (!opt.pattern)
+ die("no pattern given.");
+
+ err = regcomp(&opt.regexp, opt.pattern, REG_NEWLINE);
+ if (err) {
+ char errbuf[1024];
+ regerror(err, &opt.regexp, errbuf, 1024);
+ regfree(&opt.regexp);
+ die("'%s': %s", opt.pattern, errbuf);
+ }
+
+ init_revisions(&rev);
+ *dst = NULL;
+ argc = setup_revisions(dst - argv, argv, &rev, NULL);
+
+ /*
+ * Do not walk "grep -e foo master next pu -- Documentation/"
+ * but do walk "grep -e foo master..next -- Documentation/".
+ * Ranged request mixed with a blob or tree object, like
+ * "grep -e foo v1.0.0:Documentation/ master..next"
+ * so detect that and complain.
+ */
+ for (list = rev.pending_objects; list; list = list->next) {
+ struct object *real_obj;
+ if (list->item->flags & UNINTERESTING)
+ seen_range = 1;
+ real_obj = deref_tag(list->item, NULL, 0);
+ if (strcmp(real_obj->type, commit_type))
+ seen_noncommit = 1;
+ }
+ if (!rev.pending_objects)
+ return !grep_cache(&opt, &rev, cached);
+ if (cached)
+ die("both --cached and revisions given.");
+
+ if (seen_range && seen_noncommit)
+ die("both A..B and non commit are given.");
+ if (seen_range) {
+ struct commit *commit;
+ prepare_revision_walk(&rev);
+ while ((commit = get_revision(&rev)) != NULL) {
+ unsigned char *sha1 = commit->object.sha1;
+ char *n = find_unique_abbrev(sha1, DEFAULT_ABBREV);
+ char rev_name[41];
+ strcpy(rev_name, n);
+ if (grep_object(&opt, &rev, &commit->object, rev_name))
+ hit = 1;
+ commit->buffer = NULL;
+ }
+ return !hit;
+ }
+
+ /* all of them are non-commit; do not walk, and
+ * do not lose their names.
+ */
+ for (list = rev.pending_objects; list; list = list->next) {
+ struct object *real_obj;
+ real_obj = deref_tag(list->item, NULL, 0);
+ if (grep_object(&opt, &rev, real_obj, list->name))
+ hit = 1;
+ }
+ return !hit;
+}
diff --git a/builtin.h b/builtin.h
index 47408a0..cf5de3b 100644
--- a/builtin.h
+++ b/builtin.h
@@ -19,5 +19,6 @@ extern int cmd_version(int argc, const c
extern int cmd_whatchanged(int argc, const char **argv, char **envp);
extern int cmd_show(int argc, const char **argv, char **envp);
extern int cmd_log(int argc, const char **argv, char **envp);
+extern int cmd_grep(int argc, const char **argv, char **envp);
#endif
diff --git a/git.c b/git.c
index 01b7e28..18e857d 100644
--- a/git.c
+++ b/git.c
@@ -46,6 +46,7 @@ static void handle_internal_command(int
{ "log", cmd_log },
{ "whatchanged", cmd_whatchanged },
{ "show", cmd_show },
+ { "grip", cmd_grep },
};
int i;
--
1.3.1.gd233
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 6:32 [PATCH] built-in "git grep" (git grip) Junio C Hamano @ 2006-05-01 6:56 ` Jakub Narebski 2006-05-01 6:59 ` Junio C Hamano 2006-05-01 7:30 ` [PATCH] built-in "git grep" (git grip) - quickfix Junio C Hamano 2006-05-01 14:04 ` [PATCH] built-in "git grep" (git grip) Sam Ravnborg 2 siblings, 1 reply; 21+ messages in thread From: Jakub Narebski @ 2006-05-01 6:56 UTC (permalink / raw) To: git Junio C Hamano wrote: > This attempts to set up built-in "git grep" to further reduce > our dependence on the shell, while at the same time optionally > allowing to run grep against object database. [...] > In order to stay out of the way of real work people want to get > done with the real "git grep", for now this implementation is > called "git grip". Wouldn't "git ggrep" (from git-aware grep) or "git bgrep" (from built-in grep), similar to the egrep and fgrep from the grep package? -- Jakub Narebski Warsaw, Poland ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 6:56 ` Jakub Narebski @ 2006-05-01 6:59 ` Junio C Hamano 2006-05-01 7:12 ` Jakub Narebski 0 siblings, 1 reply; 21+ messages in thread From: Junio C Hamano @ 2006-05-01 6:59 UTC (permalink / raw) To: Jakub Narebski; +Cc: git Jakub Narebski <jnareb@gmail.com> writes: > Wouldn't "git ggrep" (from git-aware grep) or "git bgrep" (from built-in > grep), similar to the egrep and fgrep from the grep package? The eventual goal is to rename it to "git grep" and remove the shell based one, so the interim name does not matter. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 6:59 ` Junio C Hamano @ 2006-05-01 7:12 ` Jakub Narebski 2006-05-02 8:33 ` Andreas Ericsson 0 siblings, 1 reply; 21+ messages in thread From: Jakub Narebski @ 2006-05-01 7:12 UTC (permalink / raw) To: git Junio C Hamano wrote: Jakub Narebski <jnareb@gmail.com> writes: > Wouldn't "git ggrep" (from git-aware grep) or "git bgrep" (from built-in > grep), similar to the egrep and fgrep from the grep package? Yes, I understand, but I just don't like using 'grip'. And it would be nice to have some convention for further not-ready-yet built-in replacements for script versions of commands, for example adding letter 'b' as 'built-in' at the beginning of command name: 'bgrep', 'bdiff'. Or use postfix 'n' or '-ng' to denote transitionary not-ready-yet new version of command: 'grepn', 'diffn' or 'grep-ng', 'diff-ng'. By the way, [my] grep is linked against libpcre - would it mean that git would also need to use pcre library, or at least have an option to use it? I also wonder if anyone would be interested to _force_ using external grep (probably enhanced)... just a thought. -- Jakub Narebski Warsaw, Poland ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 7:12 ` Jakub Narebski @ 2006-05-02 8:33 ` Andreas Ericsson 2006-05-02 8:44 ` Jakub Narebski 2006-05-02 9:01 ` Junio C Hamano 0 siblings, 2 replies; 21+ messages in thread From: Andreas Ericsson @ 2006-05-02 8:33 UTC (permalink / raw) To: Jakub Narebski; +Cc: git Jakub Narebski wrote: > Junio C Hamano wrote: > > Jakub Narebski <jnareb@gmail.com> writes: > > >>Wouldn't "git ggrep" (from git-aware grep) or "git bgrep" (from built-in >>grep), similar to the egrep and fgrep from the grep package? > > > Yes, I understand, but I just don't like using 'grip'. And it would be nice > to have some convention for further not-ready-yet built-in replacements for > script versions of commands, for example adding letter 'b' as 'built-in' at > the beginning of command name: 'bgrep', 'bdiff'. Or use postfix 'n' or > '-ng' to denote transitionary not-ready-yet new version of command: > 'grepn', 'diffn' or 'grep-ng', 'diff-ng'. > Forcing the user to remember what's implemented as built-ins is not a good idea. It was for that exact reason the "git-<command>-script" were all renamed "git-<command>" once upon a time. "git grip" work just fine for me, since it's only intended for testing and performance improvements so far. I also think it's clearer for end-users looking for a grep command if they're not faced with fgrep/egrep/ggrep/bgrep alongside plain "grep". -- Andreas Ericsson andreas.ericsson@op5.se OP5 AB www.op5.se Tel: +46 8-230225 Fax: +46 8-230231 ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 8:33 ` Andreas Ericsson @ 2006-05-02 8:44 ` Jakub Narebski 2006-05-02 9:01 ` Junio C Hamano 1 sibling, 0 replies; 21+ messages in thread From: Jakub Narebski @ 2006-05-02 8:44 UTC (permalink / raw) To: git Andreas Ericsson wrote: > Jakub Narebski wrote: >> Yes, I understand, but I just don't like using 'grip'. And it would be >> nice to have some convention for further not-ready-yet built-in >> replacements for script versions of commands, for example adding letter >> 'b' as 'built-in' at the beginning of command name: 'bgrep', 'bdiff'. Or >> use postfix 'n' or '-ng' to denote transitionary not-ready-yet new >> version of command: 'grepn', 'diffn' or 'grep-ng', 'diff-ng'. >> > > Forcing the user to remember what's implemented as built-ins is not a > good idea. It was for that exact reason the "git-<command>-script" were > all renamed "git-<command>" once upon a time. > > "git grip" work just fine for me, since it's only intended for testing > and performance improvements so far. I also think it's clearer for > end-users looking for a grep command if they're not faced with > fgrep/egrep/ggrep/bgrep alongside plain "grep". Well, scratch 'bgrep' idea, even if I had no intend for 'bgrep' to be persistent name; it was meant as transitionary name. Well, that doesn't matter much because someone interested in testing new, not-ready-yet versions of commands (I like 'grepn' idea) usually would follow git development, and would know (or not) about new version of 'git grep' being 'git grip' (and not 'git grepn'). What about forcing using external grep, and the fact that grep is linked with libpcre? -- Jakub Narebski Warsaw, Poland ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 8:33 ` Andreas Ericsson 2006-05-02 8:44 ` Jakub Narebski @ 2006-05-02 9:01 ` Junio C Hamano 2006-05-02 9:25 ` Jakub Narebski 1 sibling, 1 reply; 21+ messages in thread From: Junio C Hamano @ 2006-05-02 9:01 UTC (permalink / raw) To: Andreas Ericsson; +Cc: git Andreas Ericsson <ae@op5.se> writes: > "git grip" work just fine for me, since it's only intended for testing > and performance improvements so far. I also think it's clearer for > end-users looking for a grep command if they're not faced with > fgrep/egrep/ggrep/bgrep alongside plain "grep". I renamed "git grip" to "git grep" and "git diffn" to "git diff" both in "next" branch to avoid confusion. Thanks Andreas, Jakub, and others for input. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 9:01 ` Junio C Hamano @ 2006-05-02 9:25 ` Jakub Narebski 2006-05-02 9:39 ` Andreas Ericsson 2006-05-02 19:07 ` Junio C Hamano 0 siblings, 2 replies; 21+ messages in thread From: Jakub Narebski @ 2006-05-02 9:25 UTC (permalink / raw) To: git Junio C Hamano wrote: > Andreas Ericsson <ae@op5.se> writes: > >> "git grip" work just fine for me, since it's only intended for testing >> and performance improvements so far. I also think it's clearer for >> end-users looking for a grep command if they're not faced with >> fgrep/egrep/ggrep/bgrep alongside plain "grep". > > I renamed "git grip" to "git grep" and "git diffn" to "git diff" > both in "next" branch to avoid confusion. Thanks Andreas, > Jakub, and others for input. So, is there a way to use old, script version of those commands? -- Jakub Narebski Warsaw, Poland ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 9:25 ` Jakub Narebski @ 2006-05-02 9:39 ` Andreas Ericsson 2006-05-02 19:07 ` Junio C Hamano 1 sibling, 0 replies; 21+ messages in thread From: Andreas Ericsson @ 2006-05-02 9:39 UTC (permalink / raw) To: Jakub Narebski; +Cc: git Jakub Narebski wrote: > Junio C Hamano wrote: > > >>Andreas Ericsson <ae@op5.se> writes: >> >> >>>"git grip" work just fine for me, since it's only intended for testing >>>and performance improvements so far. I also think it's clearer for >>>end-users looking for a grep command if they're not faced with >>>fgrep/egrep/ggrep/bgrep alongside plain "grep". >> >>I renamed "git grip" to "git grep" and "git diffn" to "git diff" >>both in "next" branch to avoid confusion. Thanks Andreas, >>Jakub, and others for input. > > > So, is there a way to use old, script version of those commands? > Use "master" branch or "git-grep" syntax if you're trying "next" branch. -- Andreas Ericsson andreas.ericsson@op5.se OP5 AB www.op5.se Tel: +46 8-230225 Fax: +46 8-230231 ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 9:25 ` Jakub Narebski 2006-05-02 9:39 ` Andreas Ericsson @ 2006-05-02 19:07 ` Junio C Hamano 2006-05-02 21:23 ` Linus Torvalds 1 sibling, 1 reply; 21+ messages in thread From: Junio C Hamano @ 2006-05-02 19:07 UTC (permalink / raw) To: Jakub Narebski; +Cc: git Jakub Narebski <jnareb@gmail.com> writes: >> I renamed "git grip" to "git grep" and "git diffn" to "git diff" >> both in "next" branch to avoid confusion. Thanks Andreas, >> Jakub, and others for input. > > So, is there a way to use old, script version of those commands? I'd say that is probably not the real question you wanted to ask, but let's pretend it is for a moment. The "master" branch has not been updated to remove the script one, so you can keep running "master" one (or 1.3.X series). Or you can fork your own private edition by tweaking git.c (prevent it from running the builtin one) and Makefile (resurrect the script based one and prevent it from installing git-grep hardlinked with git itself). One thing that I will not do in the long run, however, is to keep the script based one and have builtin one. It is like carrying all the earlier slightly incompatible versions as git-grep-1.1.sh, git-grep-1.2.sh, and git-grep-1.3.sh in the source for fear of backward compatibility problems -- it is crazy. So the real question, is what are still missing in the built-in implementation. What will we lose if we remove the script based one and replace it with today's built-in one, if we are ready to do it today, and if not what we are going to do about them. My answer to the latter questions are "not yet" (obviously, that is why "master" does not have it yet), and "will support what are reasonable". Here are main differences that I am aware of: - The shell-script one, if you use GNU grep, accepts more options to grep than what the current built-in one supports. Notable ones that are missing: fixed strings (-F), patterns from file (-f), count matches (-c), omit filenames (-h), skipping binary files (-I, -U), files without match (-L), pcre (-P), silent (-q), word expression (-w), NUL (-z). They should be easy to add if somebody cares enough, and I plan to do a few myself before pushing it out to "master". - The shell-script one can be coaxed to use different "grep" implementation from the standard one with an appropriate PATH settings. At the lowest level, buitlin-grep.c::grep_buffer() function is called with the set of parsed options, the "filename" used for reporting, and the text to grepped in-core. The shell-script one always worked on working tree files, but the built-in one can work on working tree files and also alternatively on files from other versions. Regardless of where the file comes from, this function is called to look for the pattern the user is looking for. You can do two things. One is to add support for commonly used but still missing features to built-in one. For this, you would need to extend "struct grep_opt" to hold new option parameters (e.g. if you want to do "-f", you would need to hold all patterns you obtain from the named file so grep_buffer() can use them -- currently it supports only one pattern), and teach grep_buffer() how to do the new feature. Another thing you can do is to detect GIT_EXTERNAL_GREP (in the same spirit as GIT_EXTERNAL_DIFF) environment variable at the front of grep_buffer(), and when it is set, spawn the named external program with the original parameters the user supplied, probably stashed away in "struct grep_opt" when cmd_grep() does its parameter parsing, and feed it the contents of the buffer. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 19:07 ` Junio C Hamano @ 2006-05-02 21:23 ` Linus Torvalds 2006-05-02 21:54 ` Junio C Hamano 0 siblings, 1 reply; 21+ messages in thread From: Linus Torvalds @ 2006-05-02 21:23 UTC (permalink / raw) To: Junio C Hamano; +Cc: Jakub Narebski, git On Tue, 2 May 2006, Junio C Hamano wrote: > > - The shell-script one, if you use GNU grep, accepts more > options to grep than what the current built-in one supports. > Notable ones that are missing: fixed strings (-F), patterns > from file (-f), count matches (-c), omit filenames (-h), > skipping binary files (-I, -U), files without match (-L), > pcre (-P), silent (-q), word expression (-w), NUL (-z). They > should be easy to add if somebody cares enough, and I plan to > do a few myself before pushing it out to "master". I use "-w" all the time, along with -5 or similar to get context for the grep. Linus ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 21:23 ` Linus Torvalds @ 2006-05-02 21:54 ` Junio C Hamano 2006-05-02 23:07 ` Linus Torvalds 2006-05-03 0:01 ` Junio C Hamano 0 siblings, 2 replies; 21+ messages in thread From: Junio C Hamano @ 2006-05-02 21:54 UTC (permalink / raw) To: Linus Torvalds; +Cc: git Linus Torvalds <torvalds@osdl.org> writes: > On Tue, 2 May 2006, Junio C Hamano wrote: >> >> - The shell-script one, if you use GNU grep, accepts more >> options to grep than what the current built-in one supports. >> Notable ones that are missing: fixed strings (-F), patterns >> from file (-f), count matches (-c), omit filenames (-h), >> skipping binary files (-I, -U), files without match (-L), >> pcre (-P), silent (-q), word expression (-w), NUL (-z). They >> should be easy to add if somebody cares enough, and I plan to >> do a few myself before pushing it out to "master". > > I use "-w" all the time, along with -5 or similar to get context for the > grep. Noted; -w is missing; -A/-B/-C are already there so you could say -C 5 instead, and -<n> should be easy to add. On a related tangent, ever since I started using the built-in grep with ls-files like wildcard, I find myself typing something like this by mistake (this is from my day-job work project that has src/mx.js and src/mxstyle.css among other things): git diff 268a94 -- 'src/mx*' I am tempted to suggest switching pathspecs used by diff and log family to do the same wildcarding, perhaps after tightening the wildcard vs directory prefix logic used in the builtin-grep of the current "next" tip, which is a bit looser than necessary. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 21:54 ` Junio C Hamano @ 2006-05-02 23:07 ` Linus Torvalds 2006-05-03 0:01 ` Junio C Hamano 1 sibling, 0 replies; 21+ messages in thread From: Linus Torvalds @ 2006-05-02 23:07 UTC (permalink / raw) To: Junio C Hamano; +Cc: git On Tue, 2 May 2006, Junio C Hamano wrote: > > On a related tangent, ever since I started using the built-in > grep with ls-files like wildcard, I find myself typing something > like this by mistake (this is from my day-job work project that > has src/mx.js and src/mxstyle.css among other things): > > git diff 268a94 -- 'src/mx*' > > I am tempted to suggest switching pathspecs used by diff and log > family to do the same wildcarding, perhaps after tightening the > wildcard vs directory prefix logic used in the builtin-grep of > the current "next" tip, which is a bit looser than necessary. Yeah, the wildcarding is nice. You need to be very careful about it, though, to make sure that you take full advantage of the path component optimizations _before_ the wildcards, so that when you do something like the above ('src/mx*'), you do the "src/" part with the tree-level optimizations, and only the latter part with the pattern matching (because you do _not_ want to expand the whole tree when you don't want to). That "ls-files.c" thing already does part of this (that whole "prefix_len" thing for the "longest common prefix"). Linus ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-02 21:54 ` Junio C Hamano 2006-05-02 23:07 ` Linus Torvalds @ 2006-05-03 0:01 ` Junio C Hamano 1 sibling, 0 replies; 21+ messages in thread From: Junio C Hamano @ 2006-05-03 0:01 UTC (permalink / raw) To: Linus Torvalds; +Cc: git Junio C Hamano <junkio@cox.net> writes: > Linus Torvalds <torvalds@osdl.org> writes: > >> On Tue, 2 May 2006, Junio C Hamano wrote: >>> >>> - The shell-script one, if you use GNU grep, accepts more >>> options to grep than what the current built-in one supports. >>> Notable ones that are missing: fixed strings (-F), patterns >>> from file (-f), count matches (-c), omit filenames (-h), >>> skipping binary files (-I, -U), files without match (-L), >>> pcre (-P), silent (-q), word expression (-w), NUL (-z). They >>> should be easy to add if somebody cares enough, and I plan to >>> do a few myself before pushing it out to "master". >> >> I use "-w" all the time, along with -5 or similar to get context for the >> grep. > > Noted; -w is missing; -A/-B/-C are already there so you could > say -C 5 instead, and -<n> should be easy to add. I did both -<n> and -w, and pushed it out in "next". What we have: -<n>, -[ABC] <n> (and -[ABC]<n>) -E -G -H (but it is an no-op -- we always show name) -c -e (you can do multiple patterns now) -i -n -v -w -l What are still missing: -I (easy) -L (probably a bit intrusive) -P (code is easy -- deciding dependency on pcre is OK is harder) -U (probably not so easy but may be useful) -Z (probably easy but is it useful?) -q (may not be worth doing) -z (easy but pointless) -F (dunno) -f (with the enhancement to do multiple -e, trivial to add this) ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH] built-in "git grep" (git grip) - quickfix 2006-05-01 6:32 [PATCH] built-in "git grep" (git grip) Junio C Hamano 2006-05-01 6:56 ` Jakub Narebski @ 2006-05-01 7:30 ` Junio C Hamano 2006-05-01 14:04 ` [PATCH] built-in "git grep" (git grip) Sam Ravnborg 2 siblings, 0 replies; 21+ messages in thread From: Junio C Hamano @ 2006-05-01 7:30 UTC (permalink / raw) To: git Junio C Hamano <junkio@cox.net> writes: > Right now, it does not understand and/or obey many options grep > should accept, and the pattern matcher using POSIX.2 regex seems > to be excruciatingly slow... I forgot to say that unlike the shell script version you need to give -e in front of the pattern with this version because of the way the option parser is structured. Obviously this needs to be fixed for usability's sake. But I seem to have managed to fix the "excruciatingly slow" part trivially. regexec() is not re.match() but re.search(), and there is no point looking at each character on the line. Here is a patch. -- >8 -- diff --git a/builtin-grep.c b/builtin-grep.c index adcdbaa..6230f44 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -42,26 +42,18 @@ static int grep_buffer(struct grep_opt * while (left) { regmatch_t pmatch[10]; - int flags = 0; - char *eol, *cp, ch; + char *eol, ch; eol = end_of_line(bol, &left); ch = *eol; *eol = 0; - for (cp = bol; cp < eol; cp++) { - int status = regexec(&opt->regexp, cp, - ARRAY_SIZE(pmatch), pmatch, - flags); - if (status == REG_NOMATCH) - flags |= REG_NOTBOL; - else if (status == 0) { - /* Hit at this line */ - printf("%s:", name); - if (opt->linenum) - printf("%d:", lno); - printf("%.*s\n", eol-bol, bol); - hit = 1; - break; - } + if (!regexec(&opt->regexp, bol, + ARRAY_SIZE(pmatch), pmatch, 0)) { + /* Hit at this line */ + printf("%s:", name); + if (opt->linenum) + printf("%d:", lno); + printf("%.*s\n", eol-bol, bol); + hit = 1; } *eol = ch; bol = eol + 1; ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 6:32 [PATCH] built-in "git grep" (git grip) Junio C Hamano 2006-05-01 6:56 ` Jakub Narebski 2006-05-01 7:30 ` [PATCH] built-in "git grep" (git grip) - quickfix Junio C Hamano @ 2006-05-01 14:04 ` Sam Ravnborg 2006-05-01 14:07 ` Sam Ravnborg 2 siblings, 1 reply; 21+ messages in thread From: Sam Ravnborg @ 2006-05-01 14:04 UTC (permalink / raw) To: Junio C Hamano; +Cc: git On Sun, Apr 30, 2006 at 11:32:36PM -0700, Junio C Hamano wrote: > This attempts to set up built-in "git grep" to further reduce > our dependence on the shell, while at the same time optionally > allowing to run grep against object database. You could do > funky things like these: > > git grep --cached -e pattern ;# grep from index > git grep -e pattern master ;# or in a rev > git grep -e pattern master next ;# or in multiple revs > git grep -e pattern pu^@ ;# even like this with an > ;# extension from another topic ;-) > git grep -e pattern master..next ;# or even from rev ranges > git grep -e pattern master~20:Documentation > ;# or an arbitrary tree > git grep -e pattern next:git-commit.sh > ;# or an arbitrary blob > A feature I have been missing often has been the possibility to limit grep (and ls-files) to certain filenames. Say: git grip -e DEBUG 'Kconfig*' I usually do something stupid like: git ls-files | grep Kconfig | xargs grep DEBUG Thought it may be trivial to extend git grip while you are there anyway. But obviously only if this is useful for more than just me. Sam ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 14:04 ` [PATCH] built-in "git grep" (git grip) Sam Ravnborg @ 2006-05-01 14:07 ` Sam Ravnborg 2006-05-01 14:53 ` Sam Ravnborg 0 siblings, 1 reply; 21+ messages in thread From: Sam Ravnborg @ 2006-05-01 14:07 UTC (permalink / raw) To: Junio C Hamano; +Cc: git > > I usually do something stupid like: > git ls-files | grep Kconfig | xargs grep DEBUG Which is indeed studip. I just learned I could say: git ls-files '*/Kconfig*' | xargs grep DEBUG Sam ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 14:07 ` Sam Ravnborg @ 2006-05-01 14:53 ` Sam Ravnborg 2006-05-01 15:48 ` Linus Torvalds 0 siblings, 1 reply; 21+ messages in thread From: Sam Ravnborg @ 2006-05-01 14:53 UTC (permalink / raw) To: Junio C Hamano; +Cc: git On Mon, May 01, 2006 at 04:07:04PM +0200, Sam Ravnborg wrote: > > > > I usually do something stupid like: > > git ls-files | grep Kconfig | xargs grep DEBUG > > Which is indeed studip. I just learned I could say: > git ls-files '*/Kconfig*' | xargs grep DEBUG Seems I have confused myself. git grep DEBUG '*/Kconfig*' does indeed work today. And browsing the git grip code that will also support it. Sorry for the noise. Sam ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] built-in "git grep" (git grip). 2006-05-01 14:53 ` Sam Ravnborg @ 2006-05-01 15:48 ` Linus Torvalds 2006-05-01 19:30 ` [PATCH] builtin-grep: wildcard pathspec fixes Junio C Hamano 0 siblings, 1 reply; 21+ messages in thread From: Linus Torvalds @ 2006-05-01 15:48 UTC (permalink / raw) To: Sam Ravnborg; +Cc: Junio C Hamano, git On Mon, 1 May 2006, Sam Ravnborg wrote: > > Seems I have confused myself. > > git grep DEBUG '*/Kconfig*' > > does indeed work today. Indeed. I was a bit confused about your report, since not only does it work today, that's how it has always worked, and it was very much designed that way. I use it all the time. It takes the git-ls-files pathname syntax, which is a bit _different_ from the normal "limit to these paths" syntax, in that it honors '*'. And it honors that a bit differently than normal shell pathname expansion, because for git-ls-files a '*' pattern will match '/' as well. So git grep pattern 'net/*.c' will match every single C file found _recursively_ inside the "net/" subdirectory, not just in that single directory itself. So "*" for git grep is a bit more like a "**" pattern in some shells. Linus ^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH] builtin-grep: wildcard pathspec fixes 2006-05-01 15:48 ` Linus Torvalds @ 2006-05-01 19:30 ` Junio C Hamano 2006-05-01 23:24 ` [PATCH] builtin-grep: do not use setup_revisions() Junio C Hamano 0 siblings, 1 reply; 21+ messages in thread From: Junio C Hamano @ 2006-05-01 19:30 UTC (permalink / raw) To: Linus Torvalds; +Cc: git This tweaks the pathspec wildcard used in builtin-grep to match that of ls-files. With this: git grep -e DEBUG -- '*/Kconfig*' would work like the shell script version, and you could even do: git grep -e DEBUG --cached -- '*/Kconfig*' ;# from index git grep -e DEBUG v2.6.12 -- '*/Kconfig*' ;# from rev Signed-off-by: Junio C Hamano <junkio@cox.net> --- * Still haven't improved the "-e" issue (and to a lesser extent I think requiring -- is not right in this context either), but builtin-grep.c | 85 +++++++++++++++++++++++++++++++++++++++++--------------- 1 files changed, 62 insertions(+), 23 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index 36150bf..653b65e 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -12,33 +12,66 @@ #include "diff.h" #include "revision.h" #include "builtin.h" #include <regex.h> +#include <fnmatch.h> +/* + * git grep pathspecs are somewhat different from diff-tree pathspecs; + * pathname wildcards are allowed. + */ static int pathspec_matches(struct diff_options *opt, const char *name) { - int i, j; - int namelen; + int namelen, i; if (!opt->nr_paths) return 1; namelen = strlen(name); for (i = 0; i < opt->nr_paths; i++) { const char *match = opt->paths[i]; int matchlen = opt->pathlens[i]; - if (matchlen <= namelen) { - if (!strncmp(name, match, matchlen)) - return 1; + const char *slash, *cp; + + if ((matchlen <= namelen) && + !strncmp(name, match, matchlen) && + (match[matchlen-1] == '/' || + name[matchlen] == '\0' || name[matchlen] == '/')) + return 1; + if (!fnmatch(match, name, 0)) + return 1; + if (name[namelen-1] != '/') continue; - } - /* If name is "Documentation" and pathspec is - * "Documentation/", they should match. Maybe - * we would want to strip it in get_pathspec()??? + + /* We are being asked if the name directory is worth + * descending into. + * + * Find the longest leading directory name that does + * not have metacharacter in the pathspec; the name + * we are looking at must overlap with that directory. */ - if (strncmp(name, match, namelen)) - continue; - for (j = namelen; j < matchlen; j++) - if (match[j] != '/') + for (cp = match, slash = NULL; cp - match < matchlen; cp++) { + char ch = *cp; + if (ch == '/') + slash = cp; + if (ch == '*' || ch == '[') break; - if (matchlen <= j) - return 1; + } + if (!slash) + slash = match; /* toplevel */ + else + slash++; + if (namelen <= slash - match) { + /* Looking at "Documentation/" and + * the pattern says "Documentation/howto/", or + * "Documentation/diff*.txt". + */ + if (!memcmp(match, name, namelen)) + return 1; + } + else { + /* Looking at "Documentation/howto/" and + * the pattern says "Documentation/h*". + */ + if (!memcmp(match, name, slash - match)) + return 1; + } } return 0; } @@ -232,17 +265,17 @@ static int grep_tree(struct grep_opt *op int hit = 0; const char *path; const unsigned char *sha1; - char *down_base; + char *down; char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100); if (tree_name[0]) { int offset = sprintf(path_buf, "%s:", tree_name); - down_base = path_buf + offset; - strcat(down_base, base); + down = path_buf + offset; + strcat(down, base); } else { - down_base = path_buf; - strcpy(down_base, base); + down = path_buf; + strcpy(down, base); } len = strlen(path_buf); @@ -252,7 +285,14 @@ static int grep_tree(struct grep_opt *op pathlen = strlen(path); strcpy(path_buf + len, path); - if (!pathspec_matches(&revs->diffopt, down_base)) + if (S_ISDIR(mode)) + /* Match "abc/" against pathspec to + * decide if we want to descend into "abc" + * directory. + */ + strcpy(path_buf + len + pathlen, "/"); + + if (!pathspec_matches(&revs->diffopt, down)) ; else if (S_ISREG(mode)) hit |= grep_sha1(opt, sha1, path_buf); @@ -264,9 +304,8 @@ static int grep_tree(struct grep_opt *op if (!data) die("unable to read tree (%s)", sha1_to_hex(sha1)); - strcpy(path_buf + len + pathlen, "/"); sub.buf = data; - hit = grep_tree(opt, revs, &sub, tree_name, down_base); + hit |= grep_tree(opt, revs, &sub, tree_name, down); free(data); } update_tree_entry(tree); -- 1.3.1.gd233 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH] builtin-grep: do not use setup_revisions() 2006-05-01 19:30 ` [PATCH] builtin-grep: wildcard pathspec fixes Junio C Hamano @ 2006-05-01 23:24 ` Junio C Hamano 0 siblings, 0 replies; 21+ messages in thread From: Junio C Hamano @ 2006-05-01 23:24 UTC (permalink / raw) To: Linus Torvalds; +Cc: git Grep may want to grok multiple revisions, but it does not make much sense to walk revisions while doing so. This stops calling the code to parse parameters for the revision walker. The parameter parsing for the optional "-e" option becomes a lot simpler with it as well. Signed-off-by: Junio C Hamano <junkio@cox.net> --- Junio C Hamano <junkio@cox.net> writes: > * Still haven't improved the "-e" issue (and to a lesser extent > I think requiring -- is not right in this context either),... So I did this. This removes the ability to say: git grep master..next -e foobar -- Documentation but if you want to walk the revision graph, you are interested in commits that actually changes things to be subject to grep, not just all commits in the range, so if that is what you are doing, you are better off using the pickaxe. builtin-grep.c | 255 +++++++++++++++++++++++++++++--------------------------- 1 files changed, 134 insertions(+), 121 deletions(-) diff --git a/builtin-grep.c b/builtin-grep.c index c3e6701..4be1514 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -8,8 +8,7 @@ #include "blob.h" #include "tree.h" #include "commit.h" #include "tag.h" -#include "diff.h" -#include "revision.h" +#include "tree-walk.h" #include "builtin.h" #include <regex.h> #include <fnmatch.h> @@ -18,15 +17,15 @@ #include <fnmatch.h> * git grep pathspecs are somewhat different from diff-tree pathspecs; * pathname wildcards are allowed. */ -static int pathspec_matches(struct diff_options *opt, const char *name) +static int pathspec_matches(const char **paths, const char *name) { int namelen, i; - if (!opt->nr_paths) + if (!paths || !*paths) return 1; namelen = strlen(name); - for (i = 0; i < opt->nr_paths; i++) { - const char *match = opt->paths[i]; - int matchlen = opt->pathlens[i]; + for (i = 0; paths[i]; i++) { + const char *match = paths[i]; + int matchlen = strlen(match); const char *slash, *cp; if ((matchlen <= namelen) && @@ -241,7 +240,7 @@ static int grep_file(struct grep_opt *op return i; } -static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) +static int grep_cache(struct grep_opt *opt, const char **paths, int cached) { int hit = 0; int nr; @@ -251,7 +250,7 @@ static int grep_cache(struct grep_opt *o struct cache_entry *ce = active_cache[nr]; if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode))) continue; - if (!pathspec_matches(&revs->diffopt, ce->name)) + if (!pathspec_matches(paths, ce->name)) continue; if (cached) hit |= grep_sha1(opt, ce->sha1, ce->name); @@ -261,7 +260,7 @@ static int grep_cache(struct grep_opt *o return hit; } -static int grep_tree(struct grep_opt *opt, struct rev_info *revs, +static int grep_tree(struct grep_opt *opt, const char **paths, struct tree_desc *tree, const char *tree_name, const char *base) { @@ -297,7 +296,7 @@ static int grep_tree(struct grep_opt *op */ strcpy(path_buf + len + pathlen, "/"); - if (!pathspec_matches(&revs->diffopt, down)) + if (!pathspec_matches(paths, down)) ; else if (S_ISREG(mode)) hit |= grep_sha1(opt, sha1, path_buf); @@ -310,7 +309,7 @@ static int grep_tree(struct grep_opt *op die("unable to read tree (%s)", sha1_to_hex(sha1)); sub.buf = data; - hit |= grep_tree(opt, revs, &sub, tree_name, down); + hit |= grep_tree(opt, paths, &sub, tree_name, down); free(data); } update_tree_entry(tree); @@ -318,7 +317,7 @@ static int grep_tree(struct grep_opt *op return hit; } -static int grep_object(struct grep_opt *opt, struct rev_info *revs, +static int grep_object(struct grep_opt *opt, const char **paths, struct object *obj, const char *name) { if (!strcmp(obj->type, blob_type)) @@ -333,7 +332,7 @@ static int grep_object(struct grep_opt * if (!data) die("unable to read tree (%s)", sha1_to_hex(obj->sha1)); tree.buf = data; - hit = grep_tree(opt, revs, &tree, name, ""); + hit = grep_tree(opt, paths, &tree, name, ""); free(data); return hit; } @@ -345,102 +344,119 @@ static const char builtin_grep_usage[] = int cmd_grep(int argc, const char **argv, char **envp) { - struct rev_info rev; - const char **dst, **src; int err; int hit = 0; - int no_more_arg = 0; - int seen_range = 0; + int no_more_flags = 0; int seen_noncommit = 0; int cached = 0; struct grep_opt opt; - struct object_list *list; + struct object_list *list, **tail, *object_list = NULL; + const char *prefix = setup_git_directory(); + const char **paths = NULL; memset(&opt, 0, sizeof(opt)); opt.regflags = REG_NEWLINE; /* - * Interpret and remove the grep options upfront. Sigh... + * No point using rev_info, really. */ - for (dst = src = &argv[1]; src < argc + argv; ) { - const char *arg = *src++; - if (!no_more_arg) { - if (!strcmp("--", arg)) { - no_more_arg = 1; - *dst++ = arg; - continue; - } - if (!strcmp("--cached", arg)) { - cached = 1; - continue; - } - if (!strcmp("-i", arg) || - !strcmp("--ignore-case", arg)) { - opt.regflags |= REG_ICASE; - continue; - } - if (!strcmp("-v", arg) || - !strcmp("--invert-match", arg)) { - opt.invert = 1; - continue; - } - if (!strcmp("-E", arg) || - !strcmp("--extended-regexp", arg)) { - opt.regflags |= REG_EXTENDED; - continue; - } - if (!strcmp("-G", arg) || - !strcmp("--basic-regexp", arg)) { - opt.regflags &= ~REG_EXTENDED; - continue; - } - if (!strcmp("-e", arg)) { - if (src < argc + argv) { - opt.pattern = *src++; - continue; - } + while (1 < argc) { + const char *arg = argv[1]; + argc--; argv++; + if (!strcmp("--cached", arg)) { + cached = 1; + continue; + } + if (!strcmp("-i", arg) || + !strcmp("--ignore-case", arg)) { + opt.regflags |= REG_ICASE; + continue; + } + if (!strcmp("-v", arg) || + !strcmp("--invert-match", arg)) { + opt.invert = 1; + continue; + } + if (!strcmp("-E", arg) || + !strcmp("--extended-regexp", arg)) { + opt.regflags |= REG_EXTENDED; + continue; + } + if (!strcmp("-G", arg) || + !strcmp("--basic-regexp", arg)) { + opt.regflags &= ~REG_EXTENDED; + continue; + } + if (!strcmp("-n", arg)) { + opt.linenum = 1; + continue; + } + if (!strcmp("-H", arg)) { + /* We always show the pathname, so this + * is a noop. + */ + continue; + } + if (!strcmp("-l", arg) || + !strcmp("--files-with-matches", arg)) { + opt.name_only = 1; + continue; + } + if (!strcmp("-A", arg) || + !strcmp("-B", arg) || + !strcmp("-C", arg)) { + unsigned num; + if (argc <= 1 || + sscanf(*++argv, "%u", &num) != 1) usage(builtin_grep_usage); + argc--; + switch (arg[1]) { + case 'A': + opt.post_context = num; + break; + case 'C': + opt.post_context = num; + case 'B': + opt.pre_context = num; + break; } - if (!strcmp("-n", arg)) { - opt.linenum = 1; - continue; - } - if (!strcmp("-H", arg)) { - /* We always show the pathname, so this - * is a noop. + continue; + } + if (!strcmp("-e", arg)) { + if (1 < argc) { + /* We probably would want to do + * -e pat1 -e pat2 as well later... */ + if (opt.pattern) + die("more than one pattern?"); + opt.pattern = *++argv; + argc--; continue; } - if (!strcmp("-l", arg) || - !strcmp("--files-with-matches", arg)) { - opt.name_only = 1; - continue; - } - if (!strcmp("-A", arg) || - !strcmp("-B", arg) || - !strcmp("-C", arg)) { - unsigned num; - if ((argc + argv <= src) || - sscanf(*src++, "%u", &num) != 1) - usage(builtin_grep_usage); - switch (arg[1]) { - case 'A': - opt.post_context = num; - break; - case 'C': - opt.post_context = num; - case 'B': - opt.pre_context = num; - break; - } - continue; - } + usage(builtin_grep_usage); + } + if (!strcmp("--", arg)) { + no_more_flags = 1; + continue; + } + /* Either unrecognized option or a single pattern */ + if (!no_more_flags && *arg == '-') + usage(builtin_grep_usage); + if (!opt.pattern) { + opt.pattern = arg; + break; + } + else { + /* We are looking at the first path or rev; + * it is found at argv[0] after leaving the + * loop. + */ + argc++; argv--; + break; } - *dst++ = arg; } if (!opt.pattern) die("no pattern given."); - err = regcomp(&opt.regexp, opt.pattern, opt.regflags); if (err) { char errbuf[1024]; @@ -448,11 +464,32 @@ int cmd_grep(int argc, const char **argv regfree(&opt.regexp); die("'%s': %s", opt.pattern, errbuf); } + tail = &object_list; + while (1 < argc) { + struct object *object; + struct object_list *elem; + const char *arg = argv[1]; + unsigned char sha1[20]; + if (get_sha1(arg, sha1) < 0) + break; + object = parse_object(sha1); + if (!object) + die("bad object %s", arg); + elem = object_list_insert(object, tail); + elem->name = arg; + tail = &elem->next; + argc--; argv++; + } + if (1 < argc) + paths = get_pathspec(prefix, argv + 1); + else if (prefix) { + paths = xcalloc(2, sizeof(const char *)); + paths[0] = prefix; + paths[1] = NULL; + } - init_revisions(&rev); - *dst = NULL; - argc = setup_revisions(dst - argv, argv, &rev, NULL); - + if (!object_list) + return !grep_cache(&opt, paths, cached); /* * Do not walk "grep -e foo master next pu -- Documentation/" * but do walk "grep -e foo master..next -- Documentation/". @@ -460,43 +497,19 @@ int cmd_grep(int argc, const char **argv * "grep -e foo v1.0.0:Documentation/ master..next" * so detect that and complain. */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; - if (list->item->flags & UNINTERESTING) - seen_range = 1; real_obj = deref_tag(list->item, NULL, 0); if (strcmp(real_obj->type, commit_type)) seen_noncommit = 1; } - if (!rev.pending_objects) - return !grep_cache(&opt, &rev, cached); if (cached) die("both --cached and revisions given."); - if (seen_range && seen_noncommit) - die("both A..B and non commit are given."); - if (seen_range) { - struct commit *commit; - prepare_revision_walk(&rev); - while ((commit = get_revision(&rev)) != NULL) { - unsigned char *sha1 = commit->object.sha1; - const char *n = find_unique_abbrev(sha1, rev.abbrev); - char rev_name[41]; - strcpy(rev_name, n); - if (grep_object(&opt, &rev, &commit->object, rev_name)) - hit = 1; - commit->buffer = NULL; - } - return !hit; - } - - /* all of them are non-commit; do not walk, and - * do not lose their names. - */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; real_obj = deref_tag(list->item, NULL, 0); - if (grep_object(&opt, &rev, real_obj, list->name)) + if (grep_object(&opt, paths, real_obj, list->name)) hit = 1; } return !hit; -- 1.3.1.gd233 ^ permalink raw reply related [flat|nested] 21+ messages in thread
end of thread, other threads:[~2006-05-03 0:01 UTC | newest] Thread overview: 21+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-05-01 6:32 [PATCH] built-in "git grep" (git grip) Junio C Hamano 2006-05-01 6:56 ` Jakub Narebski 2006-05-01 6:59 ` Junio C Hamano 2006-05-01 7:12 ` Jakub Narebski 2006-05-02 8:33 ` Andreas Ericsson 2006-05-02 8:44 ` Jakub Narebski 2006-05-02 9:01 ` Junio C Hamano 2006-05-02 9:25 ` Jakub Narebski 2006-05-02 9:39 ` Andreas Ericsson 2006-05-02 19:07 ` Junio C Hamano 2006-05-02 21:23 ` Linus Torvalds 2006-05-02 21:54 ` Junio C Hamano 2006-05-02 23:07 ` Linus Torvalds 2006-05-03 0:01 ` Junio C Hamano 2006-05-01 7:30 ` [PATCH] built-in "git grep" (git grip) - quickfix Junio C Hamano 2006-05-01 14:04 ` [PATCH] built-in "git grep" (git grip) Sam Ravnborg 2006-05-01 14:07 ` Sam Ravnborg 2006-05-01 14:53 ` Sam Ravnborg 2006-05-01 15:48 ` Linus Torvalds 2006-05-01 19:30 ` [PATCH] builtin-grep: wildcard pathspec fixes Junio C Hamano 2006-05-01 23:24 ` [PATCH] builtin-grep: do not use setup_revisions() Junio C Hamano
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).