* [PATCH 3/3] revision traversal: --author, --committer, and --grep.
@ 2006-09-18 0:42 Junio C Hamano
2006-09-18 6:05 ` Jeff King
2006-09-18 6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King
0 siblings, 2 replies; 5+ messages in thread
From: Junio C Hamano @ 2006-09-18 0:42 UTC (permalink / raw)
To: Linus Torvalds; +Cc: git, Kai Blin, Jeff King
This adds three options to setup_revisions(), which lets you
filter resulting commits by the author name, the committer name
and the log message with regexp.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
Documentation/git-rev-list.txt | 11 ++++++
revision.c | 76 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 86 insertions(+), 1 deletions(-)
diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
index 28966ad..00a95e2 100644
--- a/Documentation/git-rev-list.txt
+++ b/Documentation/git-rev-list.txt
@@ -20,6 +20,7 @@ SYNOPSIS
[ \--stdin ]
[ \--topo-order ]
[ \--parents ]
+ [ \--(author|committer|grep)=<pattern> ]
[ [\--objects | \--objects-edge] [ \--unpacked ] ]
[ \--pretty | \--header ]
[ \--bisect ]
@@ -154,6 +155,16 @@ limiting may be applied.
Limit the commits output to specified time range.
+--author='pattern', --committer='pattern'::
+
+ Limit the commits output to ones with author/committer
+ header lines that match the specified pattern.
+
+--grep='pattern'::
+
+ Limit the commits output to ones with log message that
+ matches the specified pattern.
+
--remove-empty::
Stop when a given path disappears from the tree.
diff --git a/revision.c b/revision.c
index 8fda20d..19f7272 100644
--- a/revision.c
+++ b/revision.c
@@ -673,6 +673,40 @@ int handle_revision_arg(const char *arg,
return 0;
}
+static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
+{
+ char *pat;
+ int patlen, fldlen;
+
+ if (!revs->header_filter) {
+ struct grep_opt *opt = xcalloc(1, sizeof(*opt));
+ opt->status_only = 1;
+ opt->pattern_tail = &(opt->pattern_list);
+ opt->regflags = REG_NEWLINE;
+ revs->header_filter = opt;
+ }
+
+ fldlen = strlen(field);
+ patlen = strlen(pattern);
+ pat = xmalloc(patlen + fldlen + 3);
+ sprintf(pat, "^%s %s", field, pattern);
+ append_grep_pattern(revs->header_filter, pat,
+ "command line", 0, GREP_PATTERN);
+}
+
+static void add_message_grep(struct rev_info *revs, const char *pattern)
+{
+ if (!revs->message_filter) {
+ struct grep_opt *opt = xcalloc(1, sizeof(*opt));
+ opt->status_only = 1;
+ opt->pattern_tail = &(opt->pattern_list);
+ opt->regflags = REG_NEWLINE;
+ revs->message_filter = opt;
+ }
+ append_grep_pattern(revs->message_filter, pattern,
+ "command line", 0, GREP_PATTERN);
+}
+
/*
* Parse revision information, filling in the "rev_info" structure,
* and removing the used arguments from the argument list.
@@ -896,6 +930,18 @@ int setup_revisions(int argc, const char
revs->relative_date = 1;
continue;
}
+ if (!strncmp(arg, "--author=", 9)) {
+ add_header_grep(revs, "author", arg+9);
+ continue;
+ }
+ if (!strncmp(arg, "--committer=", 12)) {
+ add_header_grep(revs, "committer", arg+12);
+ continue;
+ }
+ if (!strncmp(arg, "--grep=", 7)) {
+ add_message_grep(revs, arg+7);
+ continue;
+ }
opts = diff_opt_parse(&revs->diffopt, argv+i, argc-i);
if (opts > 0) {
revs->diff = 1;
@@ -956,6 +1002,11 @@ int setup_revisions(int argc, const char
if (diff_setup_done(&revs->diffopt) < 0)
die("diff_setup_done failed");
+ if (revs->header_filter)
+ compile_grep_patterns(revs->header_filter);
+ if (revs->message_filter)
+ compile_grep_patterns(revs->message_filter);
+
return left;
}
@@ -1030,10 +1081,33 @@ static void mark_boundary_to_show(struct
static int commit_match(struct commit *commit, struct rev_info *opt)
{
+ char *header, *message;
+ unsigned long header_len, message_len;
+
if (!opt->header_filter && !opt->message_filter)
return 1;
- /* match it here */
+ header = commit->buffer;
+ message = strstr(header, "\n\n");
+ if (message) {
+ message += 2;
+ header_len = message - header - 1;
+ message_len = strlen(message);
+ }
+ else {
+ header_len = strlen(header);
+ message = header;
+ message_len = 0;
+ }
+
+ if (opt->header_filter &&
+ !grep_buffer(opt->header_filter, NULL, header, header_len))
+ return 0;
+
+ if (opt->message_filter &&
+ !grep_buffer(opt->message_filter, NULL, message, message_len))
+ return 0;
+
return 1;
}
--
1.4.2.1.g414e5
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
2006-09-18 0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
@ 2006-09-18 6:05 ` Jeff King
2006-09-18 6:51 ` Junio C Hamano
2006-09-18 6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King
1 sibling, 1 reply; 5+ messages in thread
From: Jeff King @ 2006-09-18 6:05 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, git, Kai Blin
On Sun, Sep 17, 2006 at 05:42:26PM -0700, Junio C Hamano wrote:
> This adds three options to setup_revisions(), which lets you
> filter resulting commits by the author name, the committer name
> and the log message with regexp.
First of all, thanks for implementing this; I tried to use it the other
day (remembering the discussion and patches a few weeks ago) and was
disappointed to find it absent.
That being said, I find the matching style completely unintuitive. :)
To find --author=foo, your strategy is to stringify the header and grep
for "^author foo". As a user, my expectation was that you would
stringify the author field and grep for "foo".
The important difference is that your approach means that the user's
regex is implicitly anchored at the beginning of the field. Thus,
searching by email address does not work with --author=junkio, but
rather requires --author='.*junkio'.
Possible fixes:
1. Match against "^<field>.*<regex>" (I haven't looked closely at the
builtin grep implementation, but presumably '.' as usual does not
include newline).
2. Find <field>, and then feed grep_buffer only the contents of that
line.
The second is what I feel that users will expect (at least what I
expected!), but is probably slightly less efficient (two greps instead
of one, but I doubt the difference would be significant). However, I
don't think there is a way with the first approach to explicitly request
a beginning-of-string anchor (i.e., "^Junio" in the second approach).
A few other thoughts:
1. Case sensitivity? For convenience sake, it seems reasonable to
match these fields without case sensitivity (what was the
capitalization of A Large Angry SCM again? von Brand or Von Brand?
etc). Should it be optional, and if so, how to specify it (a global
command line option is probably not desired, as you might want
case-sensitive --grep but case-insensitive --author). So we either
need a "-i means the rest of the arguments are insensitive, +i
means they are sensitive" option, or some syntax to specify it in
the regex (perl uses (?i)).
2. Is there any use to exposing the "header_grep" functionality with
--grep-header? Is there anything worth grepping for besides
author/committer? The general consensus on non-core headers in
commit objects seemed to be "don't do it".
3. An alias (--who=foo?) for --author=foo --committer=foo. I believe
this doesn't require boolean magic, since we default to OR.
I'm happy to work on implementing any of the above if there's interest.
-Peff
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
2006-09-18 6:05 ` Jeff King
@ 2006-09-18 6:51 ` Junio C Hamano
2006-09-18 17:07 ` Linus Torvalds
0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2006-09-18 6:51 UTC (permalink / raw)
To: Jeff King; +Cc: Linus Torvalds, git, Kai Blin
Jeff King <peff@peff.net> writes:
>The important difference is that your approach means that the user's
>regex is implicitly anchored at the beginning of the field. Thus,
>searching by email address does not work with --author=junkio, but
>rather requires --author='.*junkio'.
I wanted to default it to left anchored, so this was somewhat
deliberate, but this is probably subject to taste.
I actually wanted to do only --grep-header and --grep-message
internally with --author and --committer as simple shorthands,
and that is why --author=<pattern> is just an internal synonym
for "author <pattern>". I even considered doing without
distinction between header and body (meaning, a line in the
commit log message that happens to begin with "author " would
match --author string), which is technically incorrect but would
be more efficient. I do not think occasional false hits would
matter much in practice.
Not making the match case insensitive was probably a mistake.
Looking for "--author=linus" would be easier to type.
One thing that I know is broken is the match at the tail; I am
actually thinking of doing something like this:
sprintf(pat, "^%s %s [0-9][0-9]* [+-][0-9][0-9][0-9][0-9]$",
field, pattern);
to avoid matching the timestamps.
> A few other thoughts:
> 1. Case sensitivity?
I do not think much is lost if we make the match always case
insensitive for this application. It might be reasonable to to
default to case insensitive match but if there is a pattern that
has an uppercase use case sensitive match.
> 2. Is there any use to exposing the "header_grep" functionality with
> --grep-header?
I considered it but did not think of any. We obviously would
need to revisit this if we do "note " header in the future, but
not for now. On the other hand, it might be useful to allow
--grep-header='^\(author\|committer\) Junio'
> 3. An alias (--who=foo?) for --author=foo --committer=foo. I believe
> this doesn't require boolean magic, since we default to OR.
This should be trivial to implement (two calls to
add_header_grep instead of one), but would it be useful? I
dunno.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] rev-list: fix segfault with --{author,committer,grep}
2006-09-18 0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
2006-09-18 6:05 ` Jeff King
@ 2006-09-18 6:52 ` Jeff King
1 sibling, 0 replies; 5+ messages in thread
From: Jeff King @ 2006-09-18 6:52 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
We need to save the commit buffer if we're going to match against it.
Signed-off-by: Jeff King <peff@peff.net>
---
builtin-rev-list.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/builtin-rev-list.c b/builtin-rev-list.c
index 1f3333d..dbfee75 100644
--- a/builtin-rev-list.c
+++ b/builtin-rev-list.c
@@ -269,7 +269,9 @@ int cmd_rev_list(int argc, const char **
revs.diff)
usage(rev_list_usage);
- save_commit_buffer = revs.verbose_header;
+ save_commit_buffer = revs.verbose_header ||
+ revs.header_filter ||
+ revs.message_filter;
track_object_refs = 0;
if (bisect_list)
revs.limited = 1;
--
1.4.2.1.g9f1c-dirty
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
2006-09-18 6:51 ` Junio C Hamano
@ 2006-09-18 17:07 ` Linus Torvalds
0 siblings, 0 replies; 5+ messages in thread
From: Linus Torvalds @ 2006-09-18 17:07 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Jeff King, git, Kai Blin
On Sun, 17 Sep 2006, Junio C Hamano wrote:
>
> I wanted to default it to left anchored, so this was somewhat
> deliberate, but this is probably subject to taste.
I know that I'd prefer a rule where
"--author=^Junio"
would result in the grep-pattern being "^author Junio", but without the
initial '^' it would be "^author .*Junio".
So something like this, perhaps? It allows the regular left anchoring
syntax ('^' at the start of a pattern), but defaults to the default grep
behaviour ("anywhere in the line").
Linus
---
diff --git a/revision.c b/revision.c
index 26dd418..bca1229 100644
--- a/revision.c
+++ b/revision.c
@@ -677,6 +677,7 @@ int handle_revision_arg(const char *arg,
static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
{
char *pat;
+ const char *prefix;
int patlen, fldlen;
if (!revs->header_filter) {
@@ -689,8 +690,13 @@ static void add_header_grep(struct rev_i
fldlen = strlen(field);
patlen = strlen(pattern);
- pat = xmalloc(patlen + fldlen + 3);
- sprintf(pat, "^%s %s", field, pattern);
+ pat = xmalloc(patlen + fldlen + 10);
+ prefix = ".*";
+ if (*pattern == '^') {
+ prefix = "";
+ pattern++;
+ }
+ sprintf(pat, "^%s %s%s", field, prefix, pattern);
append_grep_pattern(revs->header_filter, pat,
"command line", 0, GREP_PATTERN);
}
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2006-09-18 17:08 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-18 0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
2006-09-18 6:05 ` Jeff King
2006-09-18 6:51 ` Junio C Hamano
2006-09-18 17:07 ` Linus Torvalds
2006-09-18 6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).