* [RFC/PATCH] Add --exclude-dir option to git grep
@ 2010-09-24 4:26 David Ripton
2010-09-24 20:33 ` Junio C Hamano
0 siblings, 1 reply; 7+ messages in thread
From: David Ripton @ 2010-09-24 4:26 UTC (permalink / raw)
To: git
It works much like the same option in recent versions of GNU grep.
Any directory name which matches the option will not be searched.
For example, "git grep --exclude-dir Documentation malloc"
Signed-off-by: David Ripton <dripton@ripton.net>
---
builtin/grep.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++------
grep.h | 2 +
2 files changed, 101 insertions(+), 11 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index da32f3d..b22a0f2 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -333,15 +333,62 @@ static int grep_config(const char *var, const char *value, void *cb)
return 0;
}
+/* Return a sorted string_list of all possible directories within path.
+ *
+ * e.g. if path is "foo/bar/baz", then return a string_list with:
+ * "bar"
+ * "bar/baz"
+ * "baz"
+ * "foo"
+ * "foo/bar"
+ * "foo/bar/baz"
+ */
+static struct string_list subdirs(const char *path)
+{
+ struct string_list list = STRING_LIST_INIT_DUP;
+ /* Make a copy so we can chop off the end. */
+ char *path2 = strdup(path);
+ /* A pointer that advances along path2 */
+ char *path3;
+ int again = 0;
+ do {
+ again = 0;
+ string_list_append(&list, path2);
+ path3 = path2;
+ while ((path3 = strchr(path3, '/')) != NULL) {
+ path3++;
+ string_list_append(&list, path3);
+ }
+ path3 = path2;
+ if ((path3 = strrchr(path3, '/')) != NULL) {
+ *path3 = '\0';
+ again = 1;
+ }
+ } while (again);
+ free(path2);
+ sort_string_list(&list);
+ return list;
+}
+
/*
* Return non-zero if max_depth is negative or path has no more then max_depth
* slashes.
*/
-static int accept_subdir(const char *path, int max_depth)
+static int accept_subdir(const char *path, int max_depth,
+ struct string_list exclude_dir_list)
{
+ struct string_list subdir_list = subdirs(path);
+ int i;
+ for (i = 0; i < subdir_list.nr; i++) {
+ if (string_list_has_string(&exclude_dir_list, subdir_list.items[i].string)) {
+ string_list_clear(&subdir_list, 0);
+ return 0;
+ }
+ }
+ string_list_clear(&subdir_list, 0);
+
if (max_depth < 0)
return 1;
-
while ((path = strchr(path, '/')) != NULL) {
max_depth--;
if (max_depth < 0)
@@ -355,7 +402,8 @@ static int accept_subdir(const char *path, int max_depth)
* Return non-zero if name is a subdirectory of match and is not too deep.
*/
static int is_subdir(const char *name, int namelen,
- const char *match, int matchlen, int max_depth)
+ const char *match, int matchlen, int max_depth,
+ struct string_list exclude_dir_list)
{
if (matchlen > namelen || strncmp(name, match, matchlen))
return 0;
@@ -364,7 +412,8 @@ static int is_subdir(const char *name, int namelen,
return 1;
if (!matchlen || match[matchlen-1] == '/' || name[matchlen] == '/')
- return accept_subdir(name + matchlen + 1, max_depth);
+ return accept_subdir(name + matchlen + 1, max_depth,
+ exclude_dir_list);
return 0;
}
@@ -373,18 +422,21 @@ static int is_subdir(const char *name, int namelen,
* git grep pathspecs are somewhat different from diff-tree pathspecs;
* pathname wildcards are allowed.
*/
-static int pathspec_matches(const char **paths, const char *name, int max_depth)
+static int pathspec_matches(const char **paths, const char *name,
+ int max_depth,
+ struct string_list exclude_dir_list)
{
int namelen, i;
if (!paths || !*paths)
- return accept_subdir(name, max_depth);
+ return accept_subdir(name, max_depth, exclude_dir_list);
namelen = strlen(name);
for (i = 0; paths[i]; i++) {
const char *match = paths[i];
int matchlen = strlen(match);
const char *cp, *meta;
- if (is_subdir(name, namelen, match, matchlen, max_depth))
+ if (is_subdir(name, namelen, match, matchlen, max_depth,
+ exclude_dir_list))
return 1;
if (!fnmatch(match, name, 0))
return 1;
@@ -595,7 +647,8 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
struct cache_entry *ce = active_cache[nr];
if (!S_ISREG(ce->ce_mode))
continue;
- if (!pathspec_matches(paths, ce->name, opt->max_depth))
+ if (!pathspec_matches(paths, ce->name, opt->max_depth,
+ opt->exclude_dir_list))
continue;
/*
* If CE_VALID is on, we assume worktree file and its cache entry
@@ -656,7 +709,8 @@ static int grep_tree(struct grep_opt *opt, const char **paths,
strbuf_addch(&pathbuf, '/');
down = pathbuf.buf + tn_len;
- if (!pathspec_matches(paths, down, opt->max_depth))
+ if (!pathspec_matches(paths, down, opt->max_depth,
+ opt->exclude_dir_list))
;
else if (S_ISREG(entry.mode))
hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
@@ -722,7 +776,8 @@ static int grep_objects(struct grep_opt *opt, const char **paths,
return hit;
}
-static int grep_directory(struct grep_opt *opt, const char **paths)
+static int grep_directory(struct grep_opt *opt, const char **paths,
+ struct string_list exclude_dir_list)
{
struct dir_struct dir;
int i, hit = 0;
@@ -730,7 +785,12 @@ static int grep_directory(struct grep_opt *opt, const char **paths)
memset(&dir, 0, sizeof(dir));
setup_standard_excludes(&dir);
+ for (i = 0; i < exclude_dir_list.nr; i++)
+ add_exclude(exclude_dir_list.items[i].string, "", 0,
+ dir.exclude_list);
+
fill_directory(&dir, paths);
+
for (i = 0; i < dir.nr; i++) {
hit |= grep_file(opt, dir.entries[i]->name);
if (hit && opt->status_only)
@@ -826,6 +886,25 @@ static int help_callback(const struct option *opt, const char *arg, int unset)
return -1;
}
+static int exclude_dir_callback(const struct option *opt, const char *arg,
+ int unset)
+{
+ struct string_list *exclude_dir_list = opt->value;
+ char *s1 = (char *)arg;
+ /* We do not want leading or trailing slashes. */
+ while (*s1 == '/') {
+ s1++;
+ }
+ char *s2 = strdup(s1);
+ while (*s2 && s2[strlen(s2)-1] == '/') {
+ s2[strlen(s2)-1] = '\0';
+ }
+ string_list_append(exclude_dir_list, s2);
+ free(s2);
+ return 0;
+}
+
+
int cmd_grep(int argc, const char **argv, const char *prefix)
{
int hit = 0;
@@ -837,6 +916,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object_array list = OBJECT_ARRAY_INIT;
const char **paths = NULL;
struct string_list path_list = STRING_LIST_INIT_NODUP;
+ struct string_list exclude_dir_list = STRING_LIST_INIT_DUP;
int i;
int dummy;
int use_index = 1;
@@ -920,6 +1000,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_BOOLEAN(0, "all-match", &opt.all_match,
"show only matches from files that match all patterns"),
OPT_GROUP(""),
+ { OPTION_CALLBACK, 0, "exclude-dir", &exclude_dir_list,
+ "pattern", "exclude <pattern>", PARSE_OPT_NONEG,
+ exclude_dir_callback },
+ OPT_GROUP(""),
{ OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager,
"pager", "show matching files in the pager",
PARSE_OPT_OPTARG, NULL, (intptr_t)default_pager },
@@ -974,6 +1058,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_NO_INTERNAL_HELP);
+ sort_string_list(&exclude_dir_list);
+ opt.exclude_dir_list = exclude_dir_list;
+
if (use_index && !startup_info->have_repository)
/* die the same way as if we did it at the beginning */
setup_git_directory();
@@ -1093,7 +1180,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
die("--cached cannot be used with --no-index.");
if (list.nr)
die("--no-index cannot be used with revs.");
- hit = grep_directory(&opt, paths);
+ hit = grep_directory(&opt, paths, exclude_dir_list);
} else if (!list.nr) {
if (!cached)
setup_work_tree();
@@ -1110,5 +1197,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
if (hit && show_in_pager)
run_pager(&opt, prefix);
free_grep_patterns(&opt);
+ string_list_clear(&exclude_dir_list, 0);
return !hit;
}
diff --git a/grep.h b/grep.h
index efa8cff..0400611 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#include "string-list.h"
enum grep_pat_token {
GREP_PATTERN,
@@ -99,6 +100,7 @@ struct grep_opt {
unsigned post_context;
unsigned last_shown;
int show_hunk_mark;
+ struct string_list exclude_dir_list;
void *priv;
void (*output)(struct grep_opt *opt, const void *data, size_t size);
--
David Ripton dripton@ripton.net
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-24 4:26 [RFC/PATCH] Add --exclude-dir option to git grep David Ripton
@ 2010-09-24 20:33 ` Junio C Hamano
2010-09-25 2:07 ` David Ripton
2010-09-25 3:35 ` David Ripton
0 siblings, 2 replies; 7+ messages in thread
From: Junio C Hamano @ 2010-09-24 20:33 UTC (permalink / raw)
To: David Ripton; +Cc: git
David Ripton <dripton@ripton.net> writes:
> It works much like the same option in recent versions of GNU grep.
> Any directory name which matches the option will not be searched.
>
> For example, "git grep --exclude-dir Documentation malloc"
>
> Signed-off-by: David Ripton <dripton@ripton.net>
Thanks.
> +/* Return a sorted string_list of all possible directories within path.
> + *
> + * e.g. if path is "foo/bar/baz", then return a string_list with:
> + * "bar"
> + * "bar/baz"
> + * "baz"
> + * "foo"
> + * "foo/bar"
> + * "foo/bar/baz"
> + */
> +static struct string_list subdirs(const char *path)
> +{
> +...
> +}
> +
> /*
> * Return non-zero if max_depth is negative or path has no more then max_depth
> * slashes.
> */
> -static int accept_subdir(const char *path, int max_depth)
> +static int accept_subdir(const char *path, int max_depth,
> + struct string_list exclude_dir_list)
> {
> + struct string_list subdir_list = subdirs(path);
Do you need to run this every time we visit a new directory, expanding
directory components over and over?
It is not like we are jumping around directory hierarchies, visiting
"foo/bar" and then "xyzzy" and then "foo/baz", but rather we visit
directories in a nicer order (i.e. after leaving "foo/bar" but before
jumping to "xyzzy", we would visit "foo/baz"), don't we?
For example, if we are about to visit "foo/bar/baz", that would mean we
were in "foo/bar" and already checked that our exclude list is Ok with
either "foo", "foo/bar" or "bar"; shouldn't we be skipping the test for
these three expansions at least? IOW, when checking against the exclude
list, shouldn't we be testing with "baz", "bar/baz" and "foo/bar/baz" and
nothing else?
> + int i;
> + for (i = 0; i < subdir_list.nr; i++) {
> + if (string_list_has_string(&exclude_dir_list, subdir_list.items[i].string)) {
> + string_list_clear(&subdir_list, 0);
> + return 0;
> + }
> + }
> + string_list_clear(&subdir_list, 0);
> +
> if (max_depth < 0)
> return 1;
Isn't this original check much cheaper than the new test based on many
comparisons and should be at the beginning of the function?
> @@ -826,6 +886,25 @@ static int help_callback(const struct option *opt, const char *arg, int unset)
> return -1;
> }
>
> +static int exclude_dir_callback(const struct option *opt, const char *arg,
> + int unset)
> +{
> + struct string_list *exclude_dir_list = opt->value;
> + char *s1 = (char *)arg;
What is this cast for?
> + /* We do not want leading or trailing slashes. */
> + while (*s1 == '/') {
> + s1++;
> + }
Can the result of this loop become an empty string, and what happens to
the rest of the logic when it happens?
> + char *s2 = strdup(s1);
decl-after-statement. Use xstrdup().
> + while (*s2 && s2[strlen(s2)-1] == '/') {
> + s2[strlen(s2)-1] = '\0';
> + }
Don't scan s2 repeatedly to find its end by calling strlen(s2) on it.
Find its length once, and scan backwards from there yourself.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-24 20:33 ` Junio C Hamano
@ 2010-09-25 2:07 ` David Ripton
2010-09-25 5:51 ` Junio C Hamano
2010-09-25 3:35 ` David Ripton
1 sibling, 1 reply; 7+ messages in thread
From: David Ripton @ 2010-09-25 2:07 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On 09/24/10 16:33, Junio C Hamano wrote:
> Thanks.
Thank you for the code review.
> Do you need to run this every time we visit a new directory, expanding
> directory components over and over?
>
> It is not like we are jumping around directory hierarchies, visiting
> "foo/bar" and then "xyzzy" and then "foo/baz", but rather we visit
> directories in a nicer order (i.e. after leaving "foo/bar" but before
> jumping to "xyzzy", we would visit "foo/baz"), don't we?
I agree that there's room for optimization here.
>> if (max_depth< 0)
>> return 1;
>
> Isn't this original check much cheaper than the new test based on many
> comparisons and should be at the beginning of the function?
Yes.
>> @@ -826,6 +886,25 @@ static int help_callback(const struct option *opt, const char *arg, int unset)
>> return -1;
>> }
>>
>> +static int exclude_dir_callback(const struct option *opt, const char *arg,
>> + int unset)
>> +{
>> + struct string_list *exclude_dir_list = opt->value;
>> + char *s1 = (char *)arg;
>
> What is this cast for?
It avoids:
"builtin/grep.c:893: warning: initialization discards qualifiers from
pointer target type"
>> + /* We do not want leading or trailing slashes. */
>> + while (*s1 == '/') {
>> + s1++;
>> + }
>
> Can the result of this loop become an empty string, and what happens to
> the rest of the logic when it happens?
If the string is just forward slashes, then it will become an empty
string, which will strdup successfully, and then that particular
--exclude-dir will have no effect. Just tested that case and did not
find a bug.
>> + char *s2 = strdup(s1);
>
> decl-after-statement.
Oops.
Sadly, "gcc -Wall -std=c89" does not warn for this. ("-pedantic" does.)
> Use xstrdup().
Okay.
>> + while (*s2&& s2[strlen(s2)-1] == '/') {
>> + s2[strlen(s2)-1] = '\0';
>> + }
>
> Don't scan s2 repeatedly to find its end by calling strlen(s2) on it.
> Find its length once, and scan backwards from there yourself.
Okay. I'll try to send out a revised version of this patch soon.
--
David Ripton dripton@ripton.net
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-24 20:33 ` Junio C Hamano
2010-09-25 2:07 ` David Ripton
@ 2010-09-25 3:35 ` David Ripton
2010-09-27 4:53 ` Junio C Hamano
1 sibling, 1 reply; 7+ messages in thread
From: David Ripton @ 2010-09-25 3:35 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
It works much like the same option in recent versions of GNU grep.
Any directory name which matches the option will not be searched.
For example, "git grep --exclude-dir Documentation malloc"
Signed-off-by: David Ripton <dripton@ripton.net>
---
Version 2 of this patch, following Junio's comments:
strdup() -> xstrdup()
Fixed a declaration after code.
Removed basenames from subdirs()
Do not call subdirs() at all if exclude_dir_list is empty.
Unfortunately the other suggested optimization, moving the original test for
max_depth < 0 in accept_subdir to the top, turned out to be unsafe. And
simplifying subdirs() to only deal with the last subdirectory rather than the
whole path makes it difficult to exclude a multi-part directory like
"Documentation/technical". But now we totally skip the subdirs() call when
exclude_dir_list is empty, so at least the cost is only born by those
actually using this option.
builtin/grep.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++------
grep.h | 2 +
2 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index da32f3d..220a7db 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -333,15 +333,68 @@ static int grep_config(const char *var, const char *value, void *cb)
return 0;
}
+/* Return a sorted string_list of all possible directories within path.
+ *
+ * e.g. if path is "foo/bar/baz", then return a string_list with:
+ * "bar"
+ * "foo"
+ * "foo/bar"
+ *
+ * (We do not need to return baz because the paths we receive always
+ * end with a file not a directory.)
+ */
+static struct string_list subdirs(const char *path)
+{
+ struct string_list list = STRING_LIST_INIT_DUP;
+ /* Make a copy so we can chop off the end. */
+ char *path2 = xstrdup(path);
+ /* A pointer that advances along path2 */
+ char *path3 = path2;
+ /* Chop off the basename portion. */
+ if ((path3 = strrchr(path3, '/')) != NULL)
+ *path3 = '\0';
+ int again = 0;
+ do {
+ again = 0;
+ string_list_append(&list, path2);
+ path3 = path2;
+ while ((path3 = strchr(path3, '/')) != NULL) {
+ path3++;
+ string_list_append(&list, path3);
+ }
+ path3 = path2;
+ if ((path3 = strrchr(path3, '/')) != NULL) {
+ *path3 = '\0';
+ again = 1;
+ }
+ } while (again);
+ free(path2);
+ sort_string_list(&list);
+ return list;
+}
+
/*
* Return non-zero if max_depth is negative or path has no more then max_depth
* slashes.
*/
-static int accept_subdir(const char *path, int max_depth)
+static int accept_subdir(const char *path, int max_depth,
+ const struct string_list exclude_dir_list)
{
+ if (exclude_dir_list.nr > 0) {
+ struct string_list subdir_list = subdirs(path);
+ int i;
+ for (i = 0; i < subdir_list.nr; i++) {
+ if (string_list_has_string(&exclude_dir_list,
+ subdir_list.items[i].string)) {
+ string_list_clear(&subdir_list, 0);
+ return 0;
+ }
+ }
+ string_list_clear(&subdir_list, 0);
+ }
+
if (max_depth < 0)
return 1;
-
while ((path = strchr(path, '/')) != NULL) {
max_depth--;
if (max_depth < 0)
@@ -355,7 +408,8 @@ static int accept_subdir(const char *path, int max_depth)
* Return non-zero if name is a subdirectory of match and is not too deep.
*/
static int is_subdir(const char *name, int namelen,
- const char *match, int matchlen, int max_depth)
+ const char *match, int matchlen, int max_depth,
+ const struct string_list exclude_dir_list)
{
if (matchlen > namelen || strncmp(name, match, matchlen))
return 0;
@@ -364,7 +418,8 @@ static int is_subdir(const char *name, int namelen,
return 1;
if (!matchlen || match[matchlen-1] == '/' || name[matchlen] == '/')
- return accept_subdir(name + matchlen + 1, max_depth);
+ return accept_subdir(name + matchlen + 1, max_depth,
+ exclude_dir_list);
return 0;
}
@@ -373,18 +428,21 @@ static int is_subdir(const char *name, int namelen,
* git grep pathspecs are somewhat different from diff-tree pathspecs;
* pathname wildcards are allowed.
*/
-static int pathspec_matches(const char **paths, const char *name, int max_depth)
+static int pathspec_matches(const char **paths, const char *name,
+ int max_depth,
+ const struct string_list exclude_dir_list)
{
int namelen, i;
if (!paths || !*paths)
- return accept_subdir(name, max_depth);
+ return accept_subdir(name, max_depth, exclude_dir_list);
namelen = strlen(name);
for (i = 0; paths[i]; i++) {
const char *match = paths[i];
int matchlen = strlen(match);
const char *cp, *meta;
- if (is_subdir(name, namelen, match, matchlen, max_depth))
+ if (is_subdir(name, namelen, match, matchlen, max_depth,
+ exclude_dir_list))
return 1;
if (!fnmatch(match, name, 0))
return 1;
@@ -595,14 +653,17 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
struct cache_entry *ce = active_cache[nr];
if (!S_ISREG(ce->ce_mode))
continue;
- if (!pathspec_matches(paths, ce->name, opt->max_depth))
+ if (!pathspec_matches(paths, ce->name, opt->max_depth,
+ opt->exclude_dir_list))
continue;
+
/*
- * If CE_VALID is on, we assume worktree file and its cache entry
- * are identical, even if worktree file has been modified, so use
- * cache version instead
+ * If CE_VALID is on, we assume worktree file and its cache
+ * entry are identical, even if worktree file has been
+ * modified, so use cache version instead
*/
- if (cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) {
+ if (cached || (ce->ce_flags & CE_VALID) ||
+ ce_skip_worktree(ce)) {
if (ce_stage(ce))
continue;
hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
@@ -656,7 +717,8 @@ static int grep_tree(struct grep_opt *opt, const char **paths,
strbuf_addch(&pathbuf, '/');
down = pathbuf.buf + tn_len;
- if (!pathspec_matches(paths, down, opt->max_depth))
+ if (!pathspec_matches(paths, down, opt->max_depth,
+ opt->exclude_dir_list))
;
else if (S_ISREG(entry.mode))
hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
@@ -722,7 +784,8 @@ static int grep_objects(struct grep_opt *opt, const char **paths,
return hit;
}
-static int grep_directory(struct grep_opt *opt, const char **paths)
+static int grep_directory(struct grep_opt *opt, const char **paths,
+ const struct string_list exclude_dir_list)
{
struct dir_struct dir;
int i, hit = 0;
@@ -730,7 +793,12 @@ static int grep_directory(struct grep_opt *opt, const char **paths)
memset(&dir, 0, sizeof(dir));
setup_standard_excludes(&dir);
+ for (i = 0; i < exclude_dir_list.nr; i++)
+ add_exclude(exclude_dir_list.items[i].string, "", 0,
+ dir.exclude_list);
+
fill_directory(&dir, paths);
+
for (i = 0; i < dir.nr; i++) {
hit |= grep_file(opt, dir.entries[i]->name);
if (hit && opt->status_only)
@@ -826,6 +894,29 @@ static int help_callback(const struct option *opt, const char *arg, int unset)
return -1;
}
+static int exclude_dir_callback(const struct option *opt, const char *arg,
+ int unset)
+{
+ struct string_list *exclude_dir_list = opt->value;
+ char *s1 = (char *)arg;
+ char *s2;
+ char *s3;
+ /* We do not want leading or trailing slashes. */
+ while (*s1 == '/') {
+ s1++;
+ }
+ s2 = xstrdup(s1);
+ s3 = s2 + strlen(s2) - 1;
+ while (s3 >= s2 && *s3 == '/') {
+ *s3 = '\0';
+ s3--;
+ }
+ string_list_append(exclude_dir_list, s2);
+ free(s2);
+ return 0;
+}
+
+
int cmd_grep(int argc, const char **argv, const char *prefix)
{
int hit = 0;
@@ -837,6 +928,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object_array list = OBJECT_ARRAY_INIT;
const char **paths = NULL;
struct string_list path_list = STRING_LIST_INIT_NODUP;
+ struct string_list exclude_dir_list = STRING_LIST_INIT_DUP;
int i;
int dummy;
int use_index = 1;
@@ -920,6 +1012,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_BOOLEAN(0, "all-match", &opt.all_match,
"show only matches from files that match all patterns"),
OPT_GROUP(""),
+ { OPTION_CALLBACK, 0, "exclude-dir", &exclude_dir_list,
+ "pattern", "exclude <pattern>", PARSE_OPT_NONEG,
+ exclude_dir_callback },
+ OPT_GROUP(""),
{ OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager,
"pager", "show matching files in the pager",
PARSE_OPT_OPTARG, NULL, (intptr_t)default_pager },
@@ -974,6 +1070,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_NO_INTERNAL_HELP);
+ sort_string_list(&exclude_dir_list);
+ opt.exclude_dir_list = exclude_dir_list;
+
if (use_index && !startup_info->have_repository)
/* die the same way as if we did it at the beginning */
setup_git_directory();
@@ -1093,7 +1192,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
die("--cached cannot be used with --no-index.");
if (list.nr)
die("--no-index cannot be used with revs.");
- hit = grep_directory(&opt, paths);
+ hit = grep_directory(&opt, paths, exclude_dir_list);
} else if (!list.nr) {
if (!cached)
setup_work_tree();
@@ -1110,5 +1209,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
if (hit && show_in_pager)
run_pager(&opt, prefix);
free_grep_patterns(&opt);
+ string_list_clear(&exclude_dir_list, 0);
return !hit;
}
diff --git a/grep.h b/grep.h
index efa8cff..0400611 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#include "string-list.h"
enum grep_pat_token {
GREP_PATTERN,
@@ -99,6 +100,7 @@ struct grep_opt {
unsigned post_context;
unsigned last_shown;
int show_hunk_mark;
+ struct string_list exclude_dir_list;
void *priv;
void (*output)(struct grep_opt *opt, const void *data, size_t size);
--
David Ripton dripton@ripton.net
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-25 2:07 ` David Ripton
@ 2010-09-25 5:51 ` Junio C Hamano
2010-09-25 13:14 ` David Ripton
0 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2010-09-25 5:51 UTC (permalink / raw)
To: David Ripton; +Cc: git
David Ripton <dripton@ripton.net> writes:
>>> +static int exclude_dir_callback(const struct option *opt, const char *arg,
>>> + int unset)
>>> +{
>>> + struct string_list *exclude_dir_list = opt->value;
>>> + char *s1 = (char *)arg;
>>
>> What is this cast for?
>
> It avoids:
>
> "builtin/grep.c:893: warning: initialization discards qualifiers from
> pointer target type"
And the reason why s1 cannot be of type "const char *" is...?
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-25 5:51 ` Junio C Hamano
@ 2010-09-25 13:14 ` David Ripton
0 siblings, 0 replies; 7+ messages in thread
From: David Ripton @ 2010-09-25 13:14 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
It works much like the same option in recent versions of GNU grep.
Any directory name which matches the option will not be searched.
For example, "git grep --exclude-dir Documentation malloc"
Signed-off-by: David Ripton <dripton@ripton.net>
---
This version makes "s1" const, rather than casting away const.
builtin/grep.c | 130 +++++++++++++++++++++++++++++++++++++++++++++++++------
grep.h | 2 +
2 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/builtin/grep.c b/builtin/grep.c
index da32f3d..699b308 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -333,15 +333,68 @@ static int grep_config(const char *var, const char *value, void *cb)
return 0;
}
+/* Return a sorted string_list of all possible directories within path.
+ *
+ * e.g. if path is "foo/bar/baz", then return a string_list with:
+ * "bar"
+ * "foo"
+ * "foo/bar"
+ *
+ * (We do not need to return baz because the paths we receive always
+ * end with a file not a directory.)
+ */
+static struct string_list subdirs(const char *path)
+{
+ struct string_list list = STRING_LIST_INIT_DUP;
+ /* Make a copy so we can chop off the end. */
+ char *path2 = xstrdup(path);
+ /* A pointer that advances along path2 */
+ char *path3 = path2;
+ /* Chop off the basename portion. */
+ if ((path3 = strrchr(path3, '/')) != NULL)
+ *path3 = '\0';
+ int again = 0;
+ do {
+ again = 0;
+ string_list_append(&list, path2);
+ path3 = path2;
+ while ((path3 = strchr(path3, '/')) != NULL) {
+ path3++;
+ string_list_append(&list, path3);
+ }
+ path3 = path2;
+ if ((path3 = strrchr(path3, '/')) != NULL) {
+ *path3 = '\0';
+ again = 1;
+ }
+ } while (again);
+ free(path2);
+ sort_string_list(&list);
+ return list;
+}
+
/*
* Return non-zero if max_depth is negative or path has no more then max_depth
* slashes.
*/
-static int accept_subdir(const char *path, int max_depth)
+static int accept_subdir(const char *path, int max_depth,
+ const struct string_list exclude_dir_list)
{
+ if (exclude_dir_list.nr > 0) {
+ struct string_list subdir_list = subdirs(path);
+ int i;
+ for (i = 0; i < subdir_list.nr; i++) {
+ if (string_list_has_string(&exclude_dir_list,
+ subdir_list.items[i].string)) {
+ string_list_clear(&subdir_list, 0);
+ return 0;
+ }
+ }
+ string_list_clear(&subdir_list, 0);
+ }
+
if (max_depth < 0)
return 1;
-
while ((path = strchr(path, '/')) != NULL) {
max_depth--;
if (max_depth < 0)
@@ -355,7 +408,8 @@ static int accept_subdir(const char *path, int max_depth)
* Return non-zero if name is a subdirectory of match and is not too deep.
*/
static int is_subdir(const char *name, int namelen,
- const char *match, int matchlen, int max_depth)
+ const char *match, int matchlen, int max_depth,
+ const struct string_list exclude_dir_list)
{
if (matchlen > namelen || strncmp(name, match, matchlen))
return 0;
@@ -364,7 +418,8 @@ static int is_subdir(const char *name, int namelen,
return 1;
if (!matchlen || match[matchlen-1] == '/' || name[matchlen] == '/')
- return accept_subdir(name + matchlen + 1, max_depth);
+ return accept_subdir(name + matchlen + 1, max_depth,
+ exclude_dir_list);
return 0;
}
@@ -373,18 +428,21 @@ static int is_subdir(const char *name, int namelen,
* git grep pathspecs are somewhat different from diff-tree pathspecs;
* pathname wildcards are allowed.
*/
-static int pathspec_matches(const char **paths, const char *name, int max_depth)
+static int pathspec_matches(const char **paths, const char *name,
+ int max_depth,
+ const struct string_list exclude_dir_list)
{
int namelen, i;
if (!paths || !*paths)
- return accept_subdir(name, max_depth);
+ return accept_subdir(name, max_depth, exclude_dir_list);
namelen = strlen(name);
for (i = 0; paths[i]; i++) {
const char *match = paths[i];
int matchlen = strlen(match);
const char *cp, *meta;
- if (is_subdir(name, namelen, match, matchlen, max_depth))
+ if (is_subdir(name, namelen, match, matchlen, max_depth,
+ exclude_dir_list))
return 1;
if (!fnmatch(match, name, 0))
return 1;
@@ -595,14 +653,17 @@ static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
struct cache_entry *ce = active_cache[nr];
if (!S_ISREG(ce->ce_mode))
continue;
- if (!pathspec_matches(paths, ce->name, opt->max_depth))
+ if (!pathspec_matches(paths, ce->name, opt->max_depth,
+ opt->exclude_dir_list))
continue;
+
/*
- * If CE_VALID is on, we assume worktree file and its cache entry
- * are identical, even if worktree file has been modified, so use
- * cache version instead
+ * If CE_VALID is on, we assume worktree file and its cache
+ * entry are identical, even if worktree file has been
+ * modified, so use cache version instead
*/
- if (cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) {
+ if (cached || (ce->ce_flags & CE_VALID) ||
+ ce_skip_worktree(ce)) {
if (ce_stage(ce))
continue;
hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
@@ -656,7 +717,8 @@ static int grep_tree(struct grep_opt *opt, const char **paths,
strbuf_addch(&pathbuf, '/');
down = pathbuf.buf + tn_len;
- if (!pathspec_matches(paths, down, opt->max_depth))
+ if (!pathspec_matches(paths, down, opt->max_depth,
+ opt->exclude_dir_list))
;
else if (S_ISREG(entry.mode))
hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
@@ -722,7 +784,8 @@ static int grep_objects(struct grep_opt *opt, const char **paths,
return hit;
}
-static int grep_directory(struct grep_opt *opt, const char **paths)
+static int grep_directory(struct grep_opt *opt, const char **paths,
+ const struct string_list exclude_dir_list)
{
struct dir_struct dir;
int i, hit = 0;
@@ -730,7 +793,12 @@ static int grep_directory(struct grep_opt *opt, const char **paths)
memset(&dir, 0, sizeof(dir));
setup_standard_excludes(&dir);
+ for (i = 0; i < exclude_dir_list.nr; i++)
+ add_exclude(exclude_dir_list.items[i].string, "", 0,
+ dir.exclude_list);
+
fill_directory(&dir, paths);
+
for (i = 0; i < dir.nr; i++) {
hit |= grep_file(opt, dir.entries[i]->name);
if (hit && opt->status_only)
@@ -826,6 +894,29 @@ static int help_callback(const struct option *opt, const char *arg, int unset)
return -1;
}
+static int exclude_dir_callback(const struct option *opt, const char *arg,
+ int unset)
+{
+ struct string_list *exclude_dir_list = opt->value;
+ const char *s1 = arg;
+ char *s2;
+ char *s3;
+ /* We do not want leading or trailing slashes. */
+ while (*s1 == '/') {
+ s1++;
+ }
+ s2 = xstrdup(s1);
+ s3 = s2 + strlen(s2) - 1;
+ while (s3 >= s2 && *s3 == '/') {
+ *s3 = '\0';
+ s3--;
+ }
+ string_list_append(exclude_dir_list, s2);
+ free(s2);
+ return 0;
+}
+
+
int cmd_grep(int argc, const char **argv, const char *prefix)
{
int hit = 0;
@@ -837,6 +928,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
struct object_array list = OBJECT_ARRAY_INIT;
const char **paths = NULL;
struct string_list path_list = STRING_LIST_INIT_NODUP;
+ struct string_list exclude_dir_list = STRING_LIST_INIT_DUP;
int i;
int dummy;
int use_index = 1;
@@ -920,6 +1012,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_BOOLEAN(0, "all-match", &opt.all_match,
"show only matches from files that match all patterns"),
OPT_GROUP(""),
+ { OPTION_CALLBACK, 0, "exclude-dir", &exclude_dir_list,
+ "pattern", "exclude <pattern>", PARSE_OPT_NONEG,
+ exclude_dir_callback },
+ OPT_GROUP(""),
{ OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager,
"pager", "show matching files in the pager",
PARSE_OPT_OPTARG, NULL, (intptr_t)default_pager },
@@ -974,6 +1070,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_NO_INTERNAL_HELP);
+ sort_string_list(&exclude_dir_list);
+ opt.exclude_dir_list = exclude_dir_list;
+
if (use_index && !startup_info->have_repository)
/* die the same way as if we did it at the beginning */
setup_git_directory();
@@ -1093,7 +1192,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
die("--cached cannot be used with --no-index.");
if (list.nr)
die("--no-index cannot be used with revs.");
- hit = grep_directory(&opt, paths);
+ hit = grep_directory(&opt, paths, exclude_dir_list);
} else if (!list.nr) {
if (!cached)
setup_work_tree();
@@ -1110,5 +1209,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
if (hit && show_in_pager)
run_pager(&opt, prefix);
free_grep_patterns(&opt);
+ string_list_clear(&exclude_dir_list, 0);
return !hit;
}
diff --git a/grep.h b/grep.h
index efa8cff..0400611 100644
--- a/grep.h
+++ b/grep.h
@@ -1,6 +1,7 @@
#ifndef GREP_H
#define GREP_H
#include "color.h"
+#include "string-list.h"
enum grep_pat_token {
GREP_PATTERN,
@@ -99,6 +100,7 @@ struct grep_opt {
unsigned post_context;
unsigned last_shown;
int show_hunk_mark;
+ struct string_list exclude_dir_list;
void *priv;
void (*output)(struct grep_opt *opt, const void *data, size_t size);
--
David Ripton dripton@ripton.net
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC/PATCH] Add --exclude-dir option to git grep
2010-09-25 3:35 ` David Ripton
@ 2010-09-27 4:53 ` Junio C Hamano
0 siblings, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2010-09-27 4:53 UTC (permalink / raw)
To: David Ripton; +Cc: git
David Ripton <dripton@ripton.net> writes:
> It works much like the same option in recent versions of GNU grep.
> Any directory name which matches the option will not be searched.
>
> For example, "git grep --exclude-dir Documentation malloc"
>
> Signed-off-by: David Ripton <dripton@ripton.net>
> ---
> Version 2 of this patch, following Junio's comments:
> strdup() -> xstrdup()
> Fixed a declaration after code.
> Removed basenames from subdirs()
> Do not call subdirs() at all if exclude_dir_list is empty.
>
> Unfortunately the other suggested optimization, moving the original test for
> max_depth < 0 in accept_subdir to the top, turned out to be unsafe. And
> simplifying subdirs() to only deal with the last subdirectory rather than the
> whole path makes it difficult to exclude a multi-part directory like
> "Documentation/technical".
Doesn't the caller have the full path, relative to the root of the working
tree, at that point? That is what the "name" parameter given to
pathspec_matches() is, and is given to accept_subdir().
If you are in "x/y/doc" and about to visit "tech", "x/y/doc/tech" is given
to you in "name" to see if it is worth going into that hierarchy.
Why isn't it enough to check that "tech", "doc/tech", "y/doc/tech", nor
"x/y/doc/tech" appear in the list of excluded patterns? At that point,
you know none of "x", "x/y" nor "x/y/doc" appear in the exclude list;
otherwise you wouldn't be in "x/y/doc" in the first place, no?
And the beauty of not having to check anything but directory components at
the tail end is that you do not have to reallocate the strings nor stuff
them in a list at all.
You might need to restructure the loop that walks the index which is a
flat list needs to be restructured to match hierarchical tree walking code
that lets you skip the entries with the same prefix in one go, though.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2010-09-27 4:55 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-24 4:26 [RFC/PATCH] Add --exclude-dir option to git grep David Ripton
2010-09-24 20:33 ` Junio C Hamano
2010-09-25 2:07 ` David Ripton
2010-09-25 5:51 ` Junio C Hamano
2010-09-25 13:14 ` David Ripton
2010-09-25 3:35 ` David Ripton
2010-09-27 4:53 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).