git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "René Scharfe" <rene.scharfe@lsrfire.ath.cx>
To: Jeff King <peff@peff.net>, Junio C Hamano <gitster@pobox.com>
Cc: Paul Mackerras <paulus@samba.org>,
	Git Mailing List <git@vger.kernel.org>,
	Pierre Habouzit <madcoder@debian.org>,
	Johannes Schindelin <Johannes.Schindelin@gmx.de>
Subject: [PATCH 1/3] --pretty=format: parse commit message only once
Date: Sat, 10 Nov 2007 12:14:20 +0100	[thread overview]
Message-ID: <4735928C.3040009@lsrfire.ath.cx> (raw)
In-Reply-To: <20071110004635.GA14992@sigill.intra.peff.net>

As Jeff King pointed out, some placeholder expansions are related to
each other: the steps to calculate one go most of the way towards
calculating the other, too.

This patch makes format_commit_message() parse the commit message
only once, remembering the position of each item.  This speeds up
handling of format strings containing multiple placeholders from the
set %s, %a*, %c*, %e, %b.

Here are the timings for the git version in next.  The first one is
to estimate the overhead of the caching, the second one is taken
from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an
example of a format string found in the wild.  The times are the
fastest of three consecutive runs in each case:

$ time git log --pretty=format:%e >/dev/null

real    0m0.381s
user    0m0.340s
sys     0m0.024s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.623s
user    0m0.556s
sys     0m0.052s

And here the times with this patch:

$ time git log --pretty=format:%e >/dev/null

real    0m0.385s
user    0m0.332s
sys     0m0.040s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.563s
user    0m0.504s
sys     0m0.048s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 pretty.c |  124 +++++++++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 82 insertions(+), 42 deletions(-)

diff --git a/pretty.c b/pretty.c
index 9fbd73f..0c2f83b 100644
--- a/pretty.c
+++ b/pretty.c
@@ -354,14 +354,67 @@ static void format_person_part(struct strbuf *sb, char part,
 	}
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+struct chunk {
+	size_t off;
+	size_t len;
+};
+
+struct format_commit_context {
+	const struct commit *commit;
+
+	/* These offsets are relative to the start of the commit message. */
+	int commit_header_parsed;
+	struct chunk subject;
+	struct chunk author;
+	struct chunk committer;
+	struct chunk encoding;
+	size_t body_off;
+};
+
+static void parse_commit_header(struct format_commit_context *context)
 {
-	const struct commit *commit = context;
-	struct commit_list *p;
+	const char *msg = context->commit->buffer;
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
+
+	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
+		int eol;
+		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
+			; /* do nothing */
+
+		if (state == SUBJECT) {
+			context->subject.off = i;
+			context->subject.len = eol - i;
+			i = eol;
+		}
+		if (i == eol) {
+			state++;
+			/* strip empty lines */
+			while (msg[eol + 1] == '\n')
+				eol++;
+		} else if (!prefixcmp(msg + i, "author ")) {
+			context->author.off = i + 7;
+			context->author.len = eol - i - 7;
+		} else if (!prefixcmp(msg + i, "committer ")) {
+			context->committer.off = i + 10;
+			context->committer.len = eol - i - 10;
+		} else if (!prefixcmp(msg + i, "encoding ")) {
+			context->encoding.off = i + 9;
+			context->encoding.len = eol - i - 9;
+		}
+		i = eol;
+	}
+	context->body_off = i;
+	context->commit_header_parsed = 1;
+}
+
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
+{
+	struct format_commit_context *c = context;
+	const struct commit *commit = c->commit;
 	const char *msg = commit->buffer;
+	struct commit_list *p;
 
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
@@ -429,45 +482,28 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	}
 
 	/* For the rest we have to parse the commit header. */
-	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
-		int eol;
-		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
-			; /* do nothing */
+	if (!c->commit_header_parsed)
+		parse_commit_header(c);
 
-		if (state == SUBJECT) {
-			if (placeholder[0] == 's') {
-				strbuf_add(sb, msg + i, eol - i);
-				return;
-			}
-			i = eol;
-		}
-		if (i == eol) {
-			state++;
-			/* strip empty lines */
-			while (msg[eol + 1] == '\n')
-				eol++;
-		} else if (!prefixcmp(msg + i, "author ")) {
-			if (placeholder[0] == 'a') {
-				format_person_part(sb, placeholder[1],
-				                   msg + i + 7, eol - i - 7);
-				return;
-			}
-		} else if (!prefixcmp(msg + i, "committer ")) {
-			if (placeholder[0] == 'c') {
-				format_person_part(sb, placeholder[1],
-				                   msg + i + 10, eol - i - 10);
-				return;
-			}
-		} else if (!prefixcmp(msg + i, "encoding ")) {
-			if (placeholder[0] == 'e') {
-				strbuf_add(sb, msg + i + 9, eol - i - 9);
-				return;
-			}
-		}
-		i = eol;
+	switch (placeholder[0]) {
+	case 's':
+		strbuf_add(sb, msg + c->subject.off, c->subject.len);
+		return;
+	case 'a':
+		format_person_part(sb, placeholder[1],
+		                   msg + c->author.off, c->author.len);
+		return;
+	case 'c':
+		format_person_part(sb, placeholder[1],
+		                   msg + c->committer.off, c->committer.len);
+		return;
+	case 'e':
+		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
+		return;
+	case 'b':
+		strbuf_addstr(sb, msg + c->body_off);
+		return;
 	}
-	if (msg[i] && placeholder[0] == 'b')	/* body */
-		strbuf_addstr(sb, msg + i);
 }
 
 void format_commit_message(const struct commit *commit,
@@ -505,7 +541,11 @@ void format_commit_message(const struct commit *commit,
 		"m",		/* left/right/bottom */
 		NULL
 	};
-	strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
+	struct format_commit_context context;
+
+	memset(&context, 0, sizeof(context));
+	context.commit = commit;
+	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
-- 
1.5.3.5.1651.g30bf

  parent reply	other threads:[~2007-11-10 11:14 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-09  0:49 [PATCH 2/2] --pretty=format: on-demand format expansion René Scharfe
2007-11-09  1:24 ` Johannes Schindelin
2007-11-09 21:13   ` René Scharfe
2007-11-09 22:18     ` Johannes Schindelin
2007-11-09 22:30       ` Junio C Hamano
2007-11-09  4:50 ` Jeff King
2007-11-09 23:16   ` René Scharfe
2007-11-10  0:31     ` Johannes Schindelin
2007-11-10  0:49       ` Jeff King
2007-11-10  0:46     ` Jeff King
2007-11-10 11:12       ` René Scharfe
2007-11-10 16:07         ` Johannes Schindelin
2007-11-10 16:24           ` René Scharfe
2007-11-10 20:36           ` Jeff King
2007-11-10 20:34         ` Jeff King
2007-11-11  8:13           ` Jeff King
2007-11-10 11:14       ` René Scharfe [this message]
2007-11-10 11:16       ` [PATCH 2/3] add strbuf_adddup() René Scharfe
2007-11-10 11:18       ` [PATCH 3/3] --format=pretty: avoid calculating expensive expansions twice René Scharfe
2007-11-11 10:29         ` Junio C Hamano
2007-11-09  4:52 ` [PATCH 2/2] --pretty=format: on-demand format expansion Jeff King
2007-11-09 23:20   ` René Scharfe
2007-11-10  0:51     ` Jeff King
2007-11-09 23:39 ` Paul Mackerras

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4735928C.3040009@lsrfire.ath.cx \
    --to=rene.scharfe@lsrfire.ath.cx \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=madcoder@debian.org \
    --cc=paulus@samba.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).