git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Petr Baudis <pasky@suse.cz>
Cc: Giuseppe Bilotta <giuseppe.bilotta@gmail.com>, git@vger.kernel.org
Subject: Re: [PATCH] Teach "log -F --author=<match>" to behave better
Date: Thu, 04 Sep 2008 21:47:59 -0700	[thread overview]
Message-ID: <7vabenyz9c.fsf@gitster.siamese.dyndns.org> (raw)
In-Reply-To: <20080904140902.GY10544@machine.or.cz> (Petr Baudis's message of "Thu, 4 Sep 2008 16:09:02 +0200")

Actually, I changed my mind.  If we were to polish the grep machinery to
suit the log matching usage, I think doing it the way this patch does is
much better.

-- >8 --
log --author/--committer: really match only with name part

When we tried to find commits done by AUTHOR, the first implementation
tried to pattern match a line with "^author .*AUTHOR", which later was
enhanced to strip leading caret and look for "^author AUTHOR" when the
search pattern was anchored at the left end (i.e. --author="^AUTHOR").

This had a few problems:

 * When looking for fixed strings (e.g. "git log -F --author=x --grep=y"),
   the regexp internally used "^author .*x" would never match anything;

 * To match at the end (e.g. "git log --author='google.com>$'"), the
   generated regexp has to also match the trailing timestamp part the
   commit header lines have.  Also, in order to determine if the '$' at
   the end means "match at the end of the line" or just a literal dollar
   sign (probably backslash-quoted), we would need to parse the regexp
   ourselves.

An earlier alternative tried to make sure that a line matches "^author "
(to limit by field name) and the user supplied pattern at the same time.
While it solved the -F problem by introducing a special override for
matching the "^author ", it did not solve the trailing timestamp nor tail
match problem.  It also would have matched every commit if --author=author
was asked for, not because the author's email part had this string, but
because every commit header line that talks about the author begins with
that field name, regardleses of who wrote it.  

Instead of piling more hacks on top of hacks, this rethinks the grep
machinery that is used to look for strings in the commit header, and makes
sure that (1) field name matches literally at the beginning of the line,
followed by a SP, and (2) the user supplied pattern is matched against the
remainder of the line, excluding the trailing timestamp data.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 grep.c     |   41 +++++++++++++++++++++++++++++++++++++++++
 grep.h     |    3 +++
 revision.c |   15 +--------------
 3 files changed, 45 insertions(+), 14 deletions(-)

diff --git c/grep.c w/grep.c
index f67d671..44a7dc6 100644
--- c/grep.c
+++ w/grep.c
@@ -2,6 +2,20 @@
 #include "grep.h"
 #include "xdiff-interface.h"
 
+void append_header_grep_pattern(struct grep_opt *opt, const char *field, const char *pat)
+{
+	struct grep_pat *p = xcalloc(1, sizeof(*p));
+	p->pattern = pat;
+	p->origin = "header";
+	p->no = 0;
+	p->token = GREP_PATTERN_HEAD;
+	p->field = field;
+	p->field_len = strlen(field);
+	*opt->pattern_tail = p;
+	opt->pattern_tail = &p->next;
+	p->next = NULL;
+}
+
 void append_grep_pattern(struct grep_opt *opt, const char *pat,
 			 const char *origin, int no, enum grep_pat_token t)
 {
@@ -247,16 +261,41 @@ static int fixmatch(const char *pattern, char *line, regmatch_t *match)
 	}
 }
 
+static int strip_timestamp(char *bol, char **eol_p)
+{
+	char *eol = *eol_p;
+	int ch;
+
+	while (bol < --eol) {
+		if (*eol != '>')
+			continue;
+		*eol_p = ++eol;
+		ch = *eol;
+		*eol = '\0';
+		return ch;
+	}
+	return 0;
+}
+
 static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
 {
 	int hit = 0;
 	int at_true_bol = 1;
+	int saved_ch = 0;
 	regmatch_t pmatch[10];
 
 	if ((p->token != GREP_PATTERN) &&
 	    ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
 		return 0;
 
+	if (p->token == GREP_PATTERN_HEAD) {
+		/* author/committer fields in a commit object */
+		if (strncmp(bol, p->field, p->field_len) || bol[p->field_len] != ' ')
+			return 0;
+		bol += p->field_len + 1;
+		saved_ch = strip_timestamp(bol, &eol);
+	}
+
  again:
 	if (!opt->fixed) {
 		regex_t *exp = &p->regexp;
@@ -298,6 +337,8 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
 			goto again;
 		}
 	}
+	if (p->token == GREP_PATTERN_HEAD && saved_ch)
+		*eol = saved_ch;
 	return hit;
 }
 
diff --git c/grep.h w/grep.h
index d252dd2..0454e50 100644
--- c/grep.h
+++ w/grep.h
@@ -23,6 +23,8 @@ struct grep_pat {
 	int no;
 	enum grep_pat_token token;
 	const char *pattern;
+	const char *field;
+	int field_len;
 	regex_t regexp;
 };
 
@@ -74,6 +76,7 @@ struct grep_opt {
 };
 
 extern void append_grep_pattern(struct grep_opt *opt, const char *pat, const char *origin, int no, enum grep_pat_token t);
+extern void append_header_grep_pattern(struct grep_opt *opt, const char *field, const char *pat);
 extern void compile_grep_patterns(struct grep_opt *opt);
 extern void free_grep_patterns(struct grep_opt *opt);
 extern int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size);
diff --git c/revision.c w/revision.c
index 36291b6..4092a11 100644
--- c/revision.c
+++ w/revision.c
@@ -955,20 +955,7 @@ static void add_grep(struct rev_info *revs, const char *ptn, enum grep_pat_token
 
 static void add_header_grep(struct rev_info *revs, const char *field, const char *pattern)
 {
-	char *pat;
-	const char *prefix;
-	int patlen, fldlen;
-
-	fldlen = strlen(field);
-	patlen = strlen(pattern);
-	pat = xmalloc(patlen + fldlen + 10);
-	prefix = ".*";
-	if (*pattern == '^') {
-		prefix = "";
-		pattern++;
-	}
-	sprintf(pat, "^%s %s%s", field, prefix, pattern);
-	add_grep(revs, pat, GREP_PATTERN_HEAD);
+	append_header_grep_pattern(&revs->grep_filter, field, pattern);
 }
 
 static void add_message_grep(struct rev_info *revs, const char *pattern)

  parent reply	other threads:[~2008-09-05  4:49 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-04  6:47 git rev-list --author/--committer b0rked with -F/--fixed-strings Giuseppe Bilotta
2008-09-04  7:12 ` Junio C Hamano
2008-09-04  8:31   ` [PATCH] Teach "log -F --author=<match>" to behave better Junio C Hamano
2008-09-04 13:33     ` Teemu Likonen
2008-09-04 19:06       ` Junio C Hamano
2008-09-04 19:25         ` Teemu Likonen
2008-09-05  1:04           ` Junio C Hamano
2008-09-04 13:56     ` Giuseppe Bilotta
2008-09-04 14:09     ` Petr Baudis
2008-09-04 19:34       ` Junio C Hamano
2008-09-05 12:35         ` --all-match Teemu Likonen
2008-09-05  4:47       ` Junio C Hamano [this message]
2008-09-04 13:46   ` git rev-list --author/--committer b0rked with -F/--fixed-strings Giuseppe Bilotta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vabenyz9c.fsf@gitster.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=giuseppe.bilotta@gmail.com \
    --cc=pasky@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).