git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/3] revision traversal: --author, --committer, and --grep.
@ 2006-09-18  0:42 Junio C Hamano
  2006-09-18  6:05 ` Jeff King
  2006-09-18  6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King
  0 siblings, 2 replies; 5+ messages in thread
From: Junio C Hamano @ 2006-09-18  0:42 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git, Kai Blin, Jeff King

This adds three options to setup_revisions(), which lets you
filter resulting commits by the author name, the committer name
and the log message with regexp.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-rev-list.txt |   11 ++++++
 revision.c                     |   76 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletions(-)

diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
index 28966ad..00a95e2 100644
--- a/Documentation/git-rev-list.txt
+++ b/Documentation/git-rev-list.txt
@@ -20,6 +20,7 @@ SYNOPSIS
 	     [ \--stdin ]
 	     [ \--topo-order ]
 	     [ \--parents ]
+	     [ \--(author|committer|grep)=<pattern> ]
 	     [ [\--objects | \--objects-edge] [ \--unpacked ] ]
 	     [ \--pretty | \--header ]
 	     [ \--bisect ]
@@ -154,6 +155,16 @@ limiting may be applied.
 
 	Limit the commits output to specified time range.
 
+--author='pattern', --committer='pattern'::
+
+	Limit the commits output to ones with author/committer
+	header lines that match the specified pattern.
+
+--grep='pattern'::
+
+	Limit the commits output to ones with log message that
+	matches the specified pattern.
+
 --remove-empty::
 
 	Stop when a given path disappears from the tree.
diff --git a/revision.c b/revision.c
index 8fda20d..19f7272 100644
--- a/revision.c
+++ b/revision.c
@@ -673,6 +673,40 @@ int handle_revision_arg(const char *arg,
 	return 0;
 }
 
+static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
+{
+	char *pat;
+	int patlen, fldlen;
+
+	if (!revs->header_filter) {
+		struct grep_opt *opt = xcalloc(1, sizeof(*opt));
+		opt->status_only = 1;
+		opt->pattern_tail = &(opt->pattern_list);
+		opt->regflags = REG_NEWLINE;
+		revs->header_filter = opt;
+	}
+
+	fldlen = strlen(field);
+	patlen = strlen(pattern);
+	pat = xmalloc(patlen + fldlen + 3);
+	sprintf(pat, "^%s %s", field, pattern);
+	append_grep_pattern(revs->header_filter, pat,
+			    "command line", 0, GREP_PATTERN);
+}
+
+static void add_message_grep(struct rev_info *revs, const char *pattern)
+{
+	if (!revs->message_filter) {
+		struct grep_opt *opt = xcalloc(1, sizeof(*opt));
+		opt->status_only = 1;
+		opt->pattern_tail = &(opt->pattern_list);
+		opt->regflags = REG_NEWLINE;
+		revs->message_filter = opt;
+	}
+	append_grep_pattern(revs->message_filter, pattern,
+			    "command line", 0, GREP_PATTERN);
+}
+
 /*
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
@@ -896,6 +930,18 @@ int setup_revisions(int argc, const char
 				revs->relative_date = 1;
 				continue;
 			}
+			if (!strncmp(arg, "--author=", 9)) {
+				add_header_grep(revs, "author", arg+9);
+				continue;
+			}
+			if (!strncmp(arg, "--committer=", 12)) {
+				add_header_grep(revs, "committer", arg+12);
+				continue;
+			}
+			if (!strncmp(arg, "--grep=", 7)) {
+				add_message_grep(revs, arg+7);
+				continue;
+			}
 			opts = diff_opt_parse(&revs->diffopt, argv+i, argc-i);
 			if (opts > 0) {
 				revs->diff = 1;
@@ -956,6 +1002,11 @@ int setup_revisions(int argc, const char
 	if (diff_setup_done(&revs->diffopt) < 0)
 		die("diff_setup_done failed");
 
+	if (revs->header_filter)
+		compile_grep_patterns(revs->header_filter);
+	if (revs->message_filter)
+		compile_grep_patterns(revs->message_filter);
+
 	return left;
 }
 
@@ -1030,10 +1081,33 @@ static void mark_boundary_to_show(struct
 
 static int commit_match(struct commit *commit, struct rev_info *opt)
 {
+	char *header, *message;
+	unsigned long header_len, message_len;
+
 	if (!opt->header_filter && !opt->message_filter)
 		return 1;
 
-	/* match it here */
+	header = commit->buffer;
+	message = strstr(header, "\n\n");
+	if (message) {
+		message += 2;
+		header_len = message - header - 1;
+		message_len = strlen(message);
+	}
+	else {
+		header_len = strlen(header);
+		message = header;
+		message_len = 0;
+	}
+
+	if (opt->header_filter &&
+	    !grep_buffer(opt->header_filter, NULL, header, header_len))
+		return 0;
+
+	if (opt->message_filter &&
+	    !grep_buffer(opt->message_filter, NULL, message, message_len))
+		return 0;
+
 	return 1;
 }
 
-- 
1.4.2.1.g414e5

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
  2006-09-18  0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
@ 2006-09-18  6:05 ` Jeff King
  2006-09-18  6:51   ` Junio C Hamano
  2006-09-18  6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King
  1 sibling, 1 reply; 5+ messages in thread
From: Jeff King @ 2006-09-18  6:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Linus Torvalds, git, Kai Blin

On Sun, Sep 17, 2006 at 05:42:26PM -0700, Junio C Hamano wrote:

> This adds three options to setup_revisions(), which lets you
> filter resulting commits by the author name, the committer name
> and the log message with regexp.

First of all, thanks for implementing this; I tried to use it the other
day (remembering the discussion and patches a few weeks ago) and was
disappointed to find it absent.

That being said, I find the matching style completely unintuitive. :)

To find --author=foo, your strategy is to stringify the header and grep
for "^author foo". As a user, my expectation was that you would
stringify the author field and grep for "foo".

The important difference is that your approach means that the user's
regex is implicitly anchored at the beginning of the field. Thus,
searching by email address does not work with --author=junkio, but
rather requires --author='.*junkio'.

Possible fixes:
  1. Match against "^<field>.*<regex>" (I haven't looked closely at the
     builtin grep implementation, but presumably '.' as usual does not
     include newline).
  2. Find <field>, and then feed grep_buffer only the contents of that
     line.
The second is what I feel that users will expect (at least what I
expected!), but is probably slightly less efficient (two greps instead
of one, but I doubt the difference would be significant). However, I
don't think there is a way with the first approach to explicitly request
a beginning-of-string anchor (i.e., "^Junio" in the second approach).

A few other thoughts:
  1. Case sensitivity? For convenience sake, it seems reasonable to
     match these fields without case sensitivity (what was the
     capitalization of A Large Angry SCM again? von Brand or Von Brand?
     etc). Should it be optional, and if so, how to specify it (a global
     command line option is probably not desired, as you might want
     case-sensitive --grep but case-insensitive --author). So we either
     need a "-i means the rest of the arguments are insensitive, +i
     means they are sensitive" option, or some syntax to specify it in
     the regex (perl uses (?i)).
  2. Is there any use to exposing the "header_grep" functionality with
     --grep-header? Is there anything worth grepping for besides
     author/committer? The general consensus on non-core headers in
     commit objects seemed to be "don't do it".
  3. An alias (--who=foo?) for --author=foo --committer=foo. I believe
     this doesn't require boolean magic, since we default to OR.

I'm happy to work on implementing any of the above if there's interest.

-Peff

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
  2006-09-18  6:05 ` Jeff King
@ 2006-09-18  6:51   ` Junio C Hamano
  2006-09-18 17:07     ` Linus Torvalds
  0 siblings, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2006-09-18  6:51 UTC (permalink / raw)
  To: Jeff King; +Cc: Linus Torvalds, git, Kai Blin

Jeff King <peff@peff.net> writes:

>The important difference is that your approach means that the user's
>regex is implicitly anchored at the beginning of the field. Thus,
>searching by email address does not work with --author=junkio, but
>rather requires --author='.*junkio'.

I wanted to default it to left anchored, so this was somewhat
deliberate, but this is probably subject to taste.

I actually wanted to do only --grep-header and --grep-message
internally with --author and --committer as simple shorthands,
and that is why --author=<pattern> is just an internal synonym
for "author <pattern>".  I even considered doing without
distinction between header and body (meaning, a line in the
commit log message that happens to begin with "author " would
match --author string), which is technically incorrect but would
be more efficient.  I do not think occasional false hits would
matter much in practice.

Not making the match case insensitive was probably a mistake.
Looking for "--author=linus" would be easier to type.

One thing that I know is broken is the match at the tail; I am
actually thinking of doing something like this:

	sprintf(pat, "^%s %s [0-9][0-9]* [+-][0-9][0-9][0-9][0-9]$",
		field, pattern);

to avoid matching the timestamps.

> A few other thoughts:
>   1. Case sensitivity?

I do not think much is lost if we make the match always case
insensitive for this application.  It might be reasonable to to
default to case insensitive match but if there is a pattern that
has an uppercase use case sensitive match.

>  2. Is there any use to exposing the "header_grep" functionality with
>     --grep-header?

I considered it but did not think of any.  We obviously would
need to revisit this if we do "note " header in the future, but
not for now.  On the other hand, it might be useful to allow

	--grep-header='^\(author\|committer\) Junio'

>   3. An alias (--who=foo?) for --author=foo --committer=foo. I believe
>      this doesn't require boolean magic, since we default to OR.

This should be trivial to implement (two calls to
add_header_grep instead of one), but would it be useful?  I
dunno.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] rev-list: fix segfault with --{author,committer,grep}
  2006-09-18  0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
  2006-09-18  6:05 ` Jeff King
@ 2006-09-18  6:52 ` Jeff King
  1 sibling, 0 replies; 5+ messages in thread
From: Jeff King @ 2006-09-18  6:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

We need to save the commit buffer if we're going to match against it.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin-rev-list.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/builtin-rev-list.c b/builtin-rev-list.c
index 1f3333d..dbfee75 100644
--- a/builtin-rev-list.c
+++ b/builtin-rev-list.c
@@ -269,7 +269,9 @@ int cmd_rev_list(int argc, const char **
 	    revs.diff)
 		usage(rev_list_usage);
 
-	save_commit_buffer = revs.verbose_header;
+	save_commit_buffer = revs.verbose_header ||
+		revs.header_filter ||
+		revs.message_filter;
 	track_object_refs = 0;
 	if (bisect_list)
 		revs.limited = 1;
-- 
1.4.2.1.g9f1c-dirty

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 3/3] revision traversal: --author, --committer, and --grep.
  2006-09-18  6:51   ` Junio C Hamano
@ 2006-09-18 17:07     ` Linus Torvalds
  0 siblings, 0 replies; 5+ messages in thread
From: Linus Torvalds @ 2006-09-18 17:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, git, Kai Blin



On Sun, 17 Sep 2006, Junio C Hamano wrote:
> 
> I wanted to default it to left anchored, so this was somewhat
> deliberate, but this is probably subject to taste.

I know that I'd prefer a rule where

 "--author=^Junio"

would result in the grep-pattern being "^author Junio", but without the 
initial '^' it would be "^author .*Junio".

So something like this, perhaps? It allows the regular left anchoring 
syntax ('^' at the start of a pattern), but defaults to the default grep 
behaviour ("anywhere in the line").

		Linus

---
diff --git a/revision.c b/revision.c
index 26dd418..bca1229 100644
--- a/revision.c
+++ b/revision.c
@@ -677,6 +677,7 @@ int handle_revision_arg(const char *arg,
 static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
 {
 	char *pat;
+	const char *prefix;
 	int patlen, fldlen;
 
 	if (!revs->header_filter) {
@@ -689,8 +690,13 @@ static void add_header_grep(struct rev_i
 
 	fldlen = strlen(field);
 	patlen = strlen(pattern);
-	pat = xmalloc(patlen + fldlen + 3);
-	sprintf(pat, "^%s %s", field, pattern);
+	pat = xmalloc(patlen + fldlen + 10);
+	prefix = ".*";
+	if (*pattern == '^') {
+		prefix = "";
+		pattern++;
+	}
+	sprintf(pat, "^%s %s%s", field, prefix, pattern);
 	append_grep_pattern(revs->header_filter, pat,
 			    "command line", 0, GREP_PATTERN);
 }

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-09-18 17:08 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-18  0:42 [PATCH 3/3] revision traversal: --author, --committer, and --grep Junio C Hamano
2006-09-18  6:05 ` Jeff King
2006-09-18  6:51   ` Junio C Hamano
2006-09-18 17:07     ` Linus Torvalds
2006-09-18  6:52 ` [PATCH] rev-list: fix segfault with --{author,committer,grep} Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).