git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] Make user formatted commit listing less expensive
@ 2007-11-04 19:14 Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 1/3] Split off the pretty print stuff into its own file Johannes Schindelin
                   ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-04 19:14 UTC (permalink / raw)
  To: git, Rene Scharfe, gitster

Hi,

this series of three splits off the formatting code from commit.c, adds 
the function interp_find_active() to interpolate.[ch], and then uses it in 
the obvious way.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/3] Split off the pretty print stuff into its own file
  2007-11-04 19:14 [PATCH 0/3] Make user formatted commit listing less expensive Johannes Schindelin
@ 2007-11-04 19:15 ` Johannes Schindelin
  2007-11-05 21:16   ` Junio C Hamano
  2007-11-04 19:15 ` [PATCH 2/3] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
  2 siblings, 1 reply; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-04 19:15 UTC (permalink / raw)
  To: git, Rene Scharfe, gitster


The file commit.c got quite large, but it does not have to be: the
code concerning pretty printing is pretty well contained.  In fact,
this commit just splits it off into pretty.c, leaving commit.c with
just 672 lines.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---

	I know, I suggested using "format-patch -C -C" for this case, but
	the response was correct in that it seems funny.

	AFAICT this is a verbatim move of two hunks from commit.c to
	pretty.c, and the usual #include mantra in front of the latter to 
	make it	compile.

 Makefile |    2 +-
 commit.c |  718 -------------------------------------------------------------
 pretty.c |  723 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 724 insertions(+), 719 deletions(-)
 create mode 100644 pretty.c

diff --git a/Makefile b/Makefile
index 19a48f5..4c5f864 100644
--- a/Makefile
+++ b/Makefile
@@ -299,7 +299,7 @@ DIFF_OBJS = \
 LIB_OBJS = \
 	blob.o commit.o connect.o csum-file.o cache-tree.o base85.o \
 	date.o diff-delta.o entry.o exec_cmd.o ident.o \
-	interpolate.o hash.o \
+	pretty.o interpolate.o hash.o \
 	lockfile.o \
 	patch-ids.o \
 	object.o pack-check.o pack-write.o patch-delta.o path.o pkt-line.o \
diff --git a/commit.c b/commit.c
index ab4eb8b..f074811 100644
--- a/commit.c
+++ b/commit.c
@@ -3,7 +3,6 @@
 #include "commit.h"
 #include "pkt-line.h"
 #include "utf8.h"
-#include "interpolate.h"
 #include "diff.h"
 #include "revision.h"
 
@@ -11,46 +10,6 @@ int save_commit_buffer = 1;
 
 const char *commit_type = "commit";
 
-static struct cmt_fmt_map {
-	const char *n;
-	size_t cmp_len;
-	enum cmit_fmt v;
-} cmt_fmts[] = {
-	{ "raw",	1,	CMIT_FMT_RAW },
-	{ "medium",	1,	CMIT_FMT_MEDIUM },
-	{ "short",	1,	CMIT_FMT_SHORT },
-	{ "email",	1,	CMIT_FMT_EMAIL },
-	{ "full",	5,	CMIT_FMT_FULL },
-	{ "fuller",	5,	CMIT_FMT_FULLER },
-	{ "oneline",	1,	CMIT_FMT_ONELINE },
-	{ "format:",	7,	CMIT_FMT_USERFORMAT},
-};
-
-static char *user_format;
-
-enum cmit_fmt get_commit_format(const char *arg)
-{
-	int i;
-
-	if (!arg || !*arg)
-		return CMIT_FMT_DEFAULT;
-	if (*arg == '=')
-		arg++;
-	if (!prefixcmp(arg, "format:")) {
-		if (user_format)
-			free(user_format);
-		user_format = xstrdup(arg + 7);
-		return CMIT_FMT_USERFORMAT;
-	}
-	for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
-		if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
-		    !strncmp(arg, cmt_fmts[i].n, strlen(arg)))
-			return cmt_fmts[i].v;
-	}
-
-	die("invalid --pretty format: %s", arg);
-}
-
 static struct commit *check_commit(struct object *obj,
 				   const unsigned char *sha1,
 				   int quiet)
@@ -444,683 +403,6 @@ void clear_commit_marks(struct commit *commit, unsigned int mark)
 	}
 }
 
-/*
- * Generic support for pretty-printing the header
- */
-static int get_one_line(const char *msg)
-{
-	int ret = 0;
-
-	for (;;) {
-		char c = *msg++;
-		if (!c)
-			break;
-		ret++;
-		if (c == '\n')
-			break;
-	}
-	return ret;
-}
-
-/* High bit set, or ISO-2022-INT */
-int non_ascii(int ch)
-{
-	ch = (ch & 0xff);
-	return ((ch & 0x80) || (ch == 0x1b));
-}
-
-static int is_rfc2047_special(char ch)
-{
-	return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
-}
-
-static void add_rfc2047(struct strbuf *sb, const char *line, int len,
-		       const char *encoding)
-{
-	int i, last;
-
-	for (i = 0; i < len; i++) {
-		int ch = line[i];
-		if (non_ascii(ch))
-			goto needquote;
-		if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
-			goto needquote;
-	}
-	strbuf_add(sb, line, len);
-	return;
-
-needquote:
-	strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
-	strbuf_addf(sb, "=?%s?q?", encoding);
-	for (i = last = 0; i < len; i++) {
-		unsigned ch = line[i] & 0xFF;
-		/*
-		 * We encode ' ' using '=20' even though rfc2047
-		 * allows using '_' for readability.  Unfortunately,
-		 * many programs do not understand this and just
-		 * leave the underscore in place.
-		 */
-		if (is_rfc2047_special(ch) || ch == ' ') {
-			strbuf_add(sb, line + last, i - last);
-			strbuf_addf(sb, "=%02X", ch);
-			last = i + 1;
-		}
-	}
-	strbuf_add(sb, line + last, len - last);
-	strbuf_addstr(sb, "?=");
-}
-
-static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
-			 const char *line, enum date_mode dmode,
-			 const char *encoding)
-{
-	char *date;
-	int namelen;
-	unsigned long time;
-	int tz;
-	const char *filler = "    ";
-
-	if (fmt == CMIT_FMT_ONELINE)
-		return;
-	date = strchr(line, '>');
-	if (!date)
-		return;
-	namelen = ++date - line;
-	time = strtoul(date, &date, 10);
-	tz = strtol(date, NULL, 10);
-
-	if (fmt == CMIT_FMT_EMAIL) {
-		char *name_tail = strchr(line, '<');
-		int display_name_length;
-		if (!name_tail)
-			return;
-		while (line < name_tail && isspace(name_tail[-1]))
-			name_tail--;
-		display_name_length = name_tail - line;
-		filler = "";
-		strbuf_addstr(sb, "From: ");
-		add_rfc2047(sb, line, display_name_length, encoding);
-		strbuf_add(sb, name_tail, namelen - display_name_length);
-		strbuf_addch(sb, '\n');
-	} else {
-		strbuf_addf(sb, "%s: %.*s%.*s\n", what,
-			      (fmt == CMIT_FMT_FULLER) ? 4 : 0,
-			      filler, namelen, line);
-	}
-	switch (fmt) {
-	case CMIT_FMT_MEDIUM:
-		strbuf_addf(sb, "Date:   %s\n", show_date(time, tz, dmode));
-		break;
-	case CMIT_FMT_EMAIL:
-		strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
-		break;
-	case CMIT_FMT_FULLER:
-		strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
-		break;
-	default:
-		/* notin' */
-		break;
-	}
-}
-
-static int is_empty_line(const char *line, int *len_p)
-{
-	int len = *len_p;
-	while (len && isspace(line[len-1]))
-		len--;
-	*len_p = len;
-	return !len;
-}
-
-static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
-			const struct commit *commit, int abbrev)
-{
-	struct commit_list *parent = commit->parents;
-
-	if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
-	    !parent || !parent->next)
-		return;
-
-	strbuf_addstr(sb, "Merge:");
-
-	while (parent) {
-		struct commit *p = parent->item;
-		const char *hex = NULL;
-		const char *dots;
-		if (abbrev)
-			hex = find_unique_abbrev(p->object.sha1, abbrev);
-		if (!hex)
-			hex = sha1_to_hex(p->object.sha1);
-		dots = (abbrev && strlen(hex) != 40) ?  "..." : "";
-		parent = parent->next;
-
-		strbuf_addf(sb, " %s%s", hex, dots);
-	}
-	strbuf_addch(sb, '\n');
-}
-
-static char *get_header(const struct commit *commit, const char *key)
-{
-	int key_len = strlen(key);
-	const char *line = commit->buffer;
-
-	for (;;) {
-		const char *eol = strchr(line, '\n'), *next;
-
-		if (line == eol)
-			return NULL;
-		if (!eol) {
-			eol = line + strlen(line);
-			next = NULL;
-		} else
-			next = eol + 1;
-		if (eol - line > key_len &&
-		    !strncmp(line, key, key_len) &&
-		    line[key_len] == ' ') {
-			return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
-		}
-		line = next;
-	}
-}
-
-static char *replace_encoding_header(char *buf, const char *encoding)
-{
-	struct strbuf tmp;
-	size_t start, len;
-	char *cp = buf;
-
-	/* guess if there is an encoding header before a \n\n */
-	while (strncmp(cp, "encoding ", strlen("encoding "))) {
-		cp = strchr(cp, '\n');
-		if (!cp || *++cp == '\n')
-			return buf;
-	}
-	start = cp - buf;
-	cp = strchr(cp, '\n');
-	if (!cp)
-		return buf; /* should not happen but be defensive */
-	len = cp + 1 - (buf + start);
-
-	strbuf_init(&tmp, 0);
-	strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
-	if (is_encoding_utf8(encoding)) {
-		/* we have re-coded to UTF-8; drop the header */
-		strbuf_remove(&tmp, start, len);
-	} else {
-		/* just replaces XXXX in 'encoding XXXX\n' */
-		strbuf_splice(&tmp, start + strlen("encoding "),
-					  len - strlen("encoding \n"),
-					  encoding, strlen(encoding));
-	}
-	return strbuf_detach(&tmp, NULL);
-}
-
-static char *logmsg_reencode(const struct commit *commit,
-			     const char *output_encoding)
-{
-	static const char *utf8 = "utf-8";
-	const char *use_encoding;
-	char *encoding;
-	char *out;
-
-	if (!*output_encoding)
-		return NULL;
-	encoding = get_header(commit, "encoding");
-	use_encoding = encoding ? encoding : utf8;
-	if (!strcmp(use_encoding, output_encoding))
-		if (encoding) /* we'll strip encoding header later */
-			out = xstrdup(commit->buffer);
-		else
-			return NULL; /* nothing to do */
-	else
-		out = reencode_string(commit->buffer,
-				      output_encoding, use_encoding);
-	if (out)
-		out = replace_encoding_header(out, output_encoding);
-
-	free(encoding);
-	return out;
-}
-
-static void fill_person(struct interp *table, const char *msg, int len)
-{
-	int start, end, tz = 0;
-	unsigned long date;
-	char *ep;
-
-	/* parse name */
-	for (end = 0; end < len && msg[end] != '<'; end++)
-		; /* do nothing */
-	start = end + 1;
-	while (end > 0 && isspace(msg[end - 1]))
-		end--;
-	table[0].value = xmemdupz(msg, end);
-
-	if (start >= len)
-		return;
-
-	/* parse email */
-	for (end = start + 1; end < len && msg[end] != '>'; end++)
-		; /* do nothing */
-
-	if (end >= len)
-		return;
-
-	table[1].value = xmemdupz(msg + start, end - start);
-
-	/* parse date */
-	for (start = end + 1; start < len && isspace(msg[start]); start++)
-		; /* do nothing */
-	if (start >= len)
-		return;
-	date = strtoul(msg + start, &ep, 10);
-	if (msg + start == ep)
-		return;
-
-	table[5].value = xmemdupz(msg + start, ep - (msg + start));
-
-	/* parse tz */
-	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
-		; /* do nothing */
-	if (start + 1 < len) {
-		tz = strtoul(msg + start + 1, NULL, 10);
-		if (msg[start] == '-')
-			tz = -tz;
-	}
-
-	interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
-	interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
-	interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
-	interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
-}
-
-void format_commit_message(const struct commit *commit,
-                           const void *format, struct strbuf *sb)
-{
-	struct interp table[] = {
-		{ "%H" },	/* commit hash */
-		{ "%h" },	/* abbreviated commit hash */
-		{ "%T" },	/* tree hash */
-		{ "%t" },	/* abbreviated tree hash */
-		{ "%P" },	/* parent hashes */
-		{ "%p" },	/* abbreviated parent hashes */
-		{ "%an" },	/* author name */
-		{ "%ae" },	/* author email */
-		{ "%ad" },	/* author date */
-		{ "%aD" },	/* author date, RFC2822 style */
-		{ "%ar" },	/* author date, relative */
-		{ "%at" },	/* author date, UNIX timestamp */
-		{ "%ai" },	/* author date, ISO 8601 */
-		{ "%cn" },	/* committer name */
-		{ "%ce" },	/* committer email */
-		{ "%cd" },	/* committer date */
-		{ "%cD" },	/* committer date, RFC2822 style */
-		{ "%cr" },	/* committer date, relative */
-		{ "%ct" },	/* committer date, UNIX timestamp */
-		{ "%ci" },	/* committer date, ISO 8601 */
-		{ "%e" },	/* encoding */
-		{ "%s" },	/* subject */
-		{ "%b" },	/* body */
-		{ "%Cred" },	/* red */
-		{ "%Cgreen" },	/* green */
-		{ "%Cblue" },	/* blue */
-		{ "%Creset" },	/* reset color */
-		{ "%n" },	/* newline */
-		{ "%m" },	/* left/right/bottom */
-	};
-	enum interp_index {
-		IHASH = 0, IHASH_ABBREV,
-		ITREE, ITREE_ABBREV,
-		IPARENTS, IPARENTS_ABBREV,
-		IAUTHOR_NAME, IAUTHOR_EMAIL,
-		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
-		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
-		ICOMMITTER_NAME, ICOMMITTER_EMAIL,
-		ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
-		ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
-		ICOMMITTER_ISO8601,
-		IENCODING,
-		ISUBJECT,
-		IBODY,
-		IRED, IGREEN, IBLUE, IRESET_COLOR,
-		INEWLINE,
-		ILEFT_RIGHT,
-	};
-	struct commit_list *p;
-	char parents[1024];
-	unsigned long len;
-	int i;
-	enum { HEADER, SUBJECT, BODY } state;
-	const char *msg = commit->buffer;
-
-	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
-		die("invalid interp table!");
-
-	/* these are independent of the commit */
-	interp_set_entry(table, IRED, "\033[31m");
-	interp_set_entry(table, IGREEN, "\033[32m");
-	interp_set_entry(table, IBLUE, "\033[34m");
-	interp_set_entry(table, IRESET_COLOR, "\033[m");
-	interp_set_entry(table, INEWLINE, "\n");
-
-	/* these depend on the commit */
-	if (!commit->object.parsed)
-		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
-			find_unique_abbrev(commit->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
-			find_unique_abbrev(commit->tree->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ILEFT_RIGHT,
-			 (commit->object.flags & BOUNDARY)
-			 ? "-"
-			 : (commit->object.flags & SYMMETRIC_LEFT)
-			 ? "<"
-			 : ">");
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
-
-	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
-		int eol;
-		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
-			; /* do nothing */
-
-		if (state == SUBJECT) {
-			table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
-			i = eol;
-		}
-		if (i == eol) {
-			state++;
-			/* strip empty lines */
-			while (msg[eol + 1] == '\n')
-				eol++;
-		} else if (!prefixcmp(msg + i, "author "))
-			fill_person(table + IAUTHOR_NAME,
-					msg + i + 7, eol - i - 7);
-		else if (!prefixcmp(msg + i, "committer "))
-			fill_person(table + ICOMMITTER_NAME,
-					msg + i + 10, eol - i - 10);
-		else if (!prefixcmp(msg + i, "encoding "))
-			table[IENCODING].value =
-				xmemdupz(msg + i + 9, eol - i - 9);
-		i = eol;
-	}
-	if (msg[i])
-		table[IBODY].value = xstrdup(msg + i);
-
-	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
-				format, table, ARRAY_SIZE(table));
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
-					format, table, ARRAY_SIZE(table));
-	}
-	strbuf_setlen(sb, sb->len + len);
-	interp_clear_table(table, ARRAY_SIZE(table));
-}
-
-static void pp_header(enum cmit_fmt fmt,
-		      int abbrev,
-		      enum date_mode dmode,
-		      const char *encoding,
-		      const struct commit *commit,
-		      const char **msg_p,
-		      struct strbuf *sb)
-{
-	int parents_shown = 0;
-
-	for (;;) {
-		const char *line = *msg_p;
-		int linelen = get_one_line(*msg_p);
-
-		if (!linelen)
-			return;
-		*msg_p += linelen;
-
-		if (linelen == 1)
-			/* End of header */
-			return;
-
-		if (fmt == CMIT_FMT_RAW) {
-			strbuf_add(sb, line, linelen);
-			continue;
-		}
-
-		if (!memcmp(line, "parent ", 7)) {
-			if (linelen != 48)
-				die("bad parent line in commit");
-			continue;
-		}
-
-		if (!parents_shown) {
-			struct commit_list *parent;
-			int num;
-			for (parent = commit->parents, num = 0;
-			     parent;
-			     parent = parent->next, num++)
-				;
-			/* with enough slop */
-			strbuf_grow(sb, num * 50 + 20);
-			add_merge_info(fmt, sb, commit, abbrev);
-			parents_shown = 1;
-		}
-
-		/*
-		 * MEDIUM == DEFAULT shows only author with dates.
-		 * FULL shows both authors but not dates.
-		 * FULLER shows both authors and dates.
-		 */
-		if (!memcmp(line, "author ", 7)) {
-			strbuf_grow(sb, linelen + 80);
-			add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
-		}
-		if (!memcmp(line, "committer ", 10) &&
-		    (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
-			strbuf_grow(sb, linelen + 80);
-			add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
-		}
-	}
-}
-
-static void pp_title_line(enum cmit_fmt fmt,
-			  const char **msg_p,
-			  struct strbuf *sb,
-			  const char *subject,
-			  const char *after_subject,
-			  const char *encoding,
-			  int plain_non_ascii)
-{
-	struct strbuf title;
-
-	strbuf_init(&title, 80);
-
-	for (;;) {
-		const char *line = *msg_p;
-		int linelen = get_one_line(line);
-
-		*msg_p += linelen;
-		if (!linelen || is_empty_line(line, &linelen))
-			break;
-
-		strbuf_grow(&title, linelen + 2);
-		if (title.len) {
-			if (fmt == CMIT_FMT_EMAIL) {
-				strbuf_addch(&title, '\n');
-			}
-			strbuf_addch(&title, ' ');
-		}
-		strbuf_add(&title, line, linelen);
-	}
-
-	strbuf_grow(sb, title.len + 1024);
-	if (subject) {
-		strbuf_addstr(sb, subject);
-		add_rfc2047(sb, title.buf, title.len, encoding);
-	} else {
-		strbuf_addbuf(sb, &title);
-	}
-	strbuf_addch(sb, '\n');
-
-	if (plain_non_ascii) {
-		const char *header_fmt =
-			"MIME-Version: 1.0\n"
-			"Content-Type: text/plain; charset=%s\n"
-			"Content-Transfer-Encoding: 8bit\n";
-		strbuf_addf(sb, header_fmt, encoding);
-	}
-	if (after_subject) {
-		strbuf_addstr(sb, after_subject);
-	}
-	if (fmt == CMIT_FMT_EMAIL) {
-		strbuf_addch(sb, '\n');
-	}
-	strbuf_release(&title);
-}
-
-static void pp_remainder(enum cmit_fmt fmt,
-			 const char **msg_p,
-			 struct strbuf *sb,
-			 int indent)
-{
-	int first = 1;
-	for (;;) {
-		const char *line = *msg_p;
-		int linelen = get_one_line(line);
-		*msg_p += linelen;
-
-		if (!linelen)
-			break;
-
-		if (is_empty_line(line, &linelen)) {
-			if (first)
-				continue;
-			if (fmt == CMIT_FMT_SHORT)
-				break;
-		}
-		first = 0;
-
-		strbuf_grow(sb, linelen + indent + 20);
-		if (indent) {
-			memset(sb->buf + sb->len, ' ', indent);
-			strbuf_setlen(sb, sb->len + indent);
-		}
-		strbuf_add(sb, line, linelen);
-		strbuf_addch(sb, '\n');
-	}
-}
-
-void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
-				  struct strbuf *sb, int abbrev,
-				  const char *subject, const char *after_subject,
-				  enum date_mode dmode, int plain_non_ascii)
-{
-	unsigned long beginning_of_body;
-	int indent = 4;
-	const char *msg = commit->buffer;
-	char *reencoded;
-	const char *encoding;
-
-	if (fmt == CMIT_FMT_USERFORMAT) {
-		format_commit_message(commit, user_format, sb);
-		return;
-	}
-
-	encoding = (git_log_output_encoding
-		    ? git_log_output_encoding
-		    : git_commit_encoding);
-	if (!encoding)
-		encoding = "utf-8";
-	reencoded = logmsg_reencode(commit, encoding);
-	if (reencoded) {
-		msg = reencoded;
-	}
-
-	if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
-		indent = 0;
-
-	/* After-subject is used to pass in Content-Type: multipart
-	 * MIME header; in that case we do not have to do the
-	 * plaintext content type even if the commit message has
-	 * non 7-bit ASCII character.  Otherwise, check if we need
-	 * to say this is not a 7-bit ASCII.
-	 */
-	if (fmt == CMIT_FMT_EMAIL && !after_subject) {
-		int i, ch, in_body;
-
-		for (in_body = i = 0; (ch = msg[i]); i++) {
-			if (!in_body) {
-				/* author could be non 7-bit ASCII but
-				 * the log may be so; skip over the
-				 * header part first.
-				 */
-				if (ch == '\n' && msg[i+1] == '\n')
-					in_body = 1;
-			}
-			else if (non_ascii(ch)) {
-				plain_non_ascii = 1;
-				break;
-			}
-		}
-	}
-
-	pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
-	if (fmt != CMIT_FMT_ONELINE && !subject) {
-		strbuf_addch(sb, '\n');
-	}
-
-	/* Skip excess blank lines at the beginning of body, if any... */
-	for (;;) {
-		int linelen = get_one_line(msg);
-		int ll = linelen;
-		if (!linelen)
-			break;
-		if (!is_empty_line(msg, &ll))
-			break;
-		msg += linelen;
-	}
-
-	/* These formats treat the title line specially. */
-	if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
-		pp_title_line(fmt, &msg, sb, subject,
-			      after_subject, encoding, plain_non_ascii);
-
-	beginning_of_body = sb->len;
-	if (fmt != CMIT_FMT_ONELINE)
-		pp_remainder(fmt, &msg, sb, indent);
-	strbuf_rtrim(sb);
-
-	/* Make sure there is an EOLN for the non-oneline case */
-	if (fmt != CMIT_FMT_ONELINE)
-		strbuf_addch(sb, '\n');
-
-	/*
-	 * The caller may append additional body text in e-mail
-	 * format.  Make sure we did not strip the blank line
-	 * between the header and the body.
-	 */
-	if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
-		strbuf_addch(sb, '\n');
-	free(reencoded);
-}
-
 struct commit *pop_commit(struct commit_list **stack)
 {
 	struct commit_list *top = *stack;
diff --git a/pretty.c b/pretty.c
new file mode 100644
index 0000000..490cede
--- /dev/null
+++ b/pretty.c
@@ -0,0 +1,723 @@
+#include "cache.h"
+#include "commit.h"
+#include "interpolate.h"
+#include "utf8.h"
+#include "diff.h"
+#include "revision.h"
+
+static struct cmt_fmt_map {
+	const char *n;
+	size_t cmp_len;
+	enum cmit_fmt v;
+} cmt_fmts[] = {
+	{ "raw",	1,	CMIT_FMT_RAW },
+	{ "medium",	1,	CMIT_FMT_MEDIUM },
+	{ "short",	1,	CMIT_FMT_SHORT },
+	{ "email",	1,	CMIT_FMT_EMAIL },
+	{ "full",	5,	CMIT_FMT_FULL },
+	{ "fuller",	5,	CMIT_FMT_FULLER },
+	{ "oneline",	1,	CMIT_FMT_ONELINE },
+	{ "format:",	7,	CMIT_FMT_USERFORMAT},
+};
+
+static char *user_format;
+
+enum cmit_fmt get_commit_format(const char *arg)
+{
+	int i;
+
+	if (!arg || !*arg)
+		return CMIT_FMT_DEFAULT;
+	if (*arg == '=')
+		arg++;
+	if (!prefixcmp(arg, "format:")) {
+		if (user_format)
+			free(user_format);
+		user_format = xstrdup(arg + 7);
+		return CMIT_FMT_USERFORMAT;
+	}
+	for (i = 0; i < ARRAY_SIZE(cmt_fmts); i++) {
+		if (!strncmp(arg, cmt_fmts[i].n, cmt_fmts[i].cmp_len) &&
+		    !strncmp(arg, cmt_fmts[i].n, strlen(arg)))
+			return cmt_fmts[i].v;
+	}
+
+	die("invalid --pretty format: %s", arg);
+}
+
+/*
+ * Generic support for pretty-printing the header
+ */
+static int get_one_line(const char *msg)
+{
+	int ret = 0;
+
+	for (;;) {
+		char c = *msg++;
+		if (!c)
+			break;
+		ret++;
+		if (c == '\n')
+			break;
+	}
+	return ret;
+}
+
+/* High bit set, or ISO-2022-INT */
+int non_ascii(int ch)
+{
+	ch = (ch & 0xff);
+	return ((ch & 0x80) || (ch == 0x1b));
+}
+
+static int is_rfc2047_special(char ch)
+{
+	return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
+}
+
+static void add_rfc2047(struct strbuf *sb, const char *line, int len,
+		       const char *encoding)
+{
+	int i, last;
+
+	for (i = 0; i < len; i++) {
+		int ch = line[i];
+		if (non_ascii(ch))
+			goto needquote;
+		if ((i + 1 < len) && (ch == '=' && line[i+1] == '?'))
+			goto needquote;
+	}
+	strbuf_add(sb, line, len);
+	return;
+
+needquote:
+	strbuf_grow(sb, len * 3 + strlen(encoding) + 100);
+	strbuf_addf(sb, "=?%s?q?", encoding);
+	for (i = last = 0; i < len; i++) {
+		unsigned ch = line[i] & 0xFF;
+		/*
+		 * We encode ' ' using '=20' even though rfc2047
+		 * allows using '_' for readability.  Unfortunately,
+		 * many programs do not understand this and just
+		 * leave the underscore in place.
+		 */
+		if (is_rfc2047_special(ch) || ch == ' ') {
+			strbuf_add(sb, line + last, i - last);
+			strbuf_addf(sb, "=%02X", ch);
+			last = i + 1;
+		}
+	}
+	strbuf_add(sb, line + last, len - last);
+	strbuf_addstr(sb, "?=");
+}
+
+static void add_user_info(const char *what, enum cmit_fmt fmt, struct strbuf *sb,
+			 const char *line, enum date_mode dmode,
+			 const char *encoding)
+{
+	char *date;
+	int namelen;
+	unsigned long time;
+	int tz;
+	const char *filler = "    ";
+
+	if (fmt == CMIT_FMT_ONELINE)
+		return;
+	date = strchr(line, '>');
+	if (!date)
+		return;
+	namelen = ++date - line;
+	time = strtoul(date, &date, 10);
+	tz = strtol(date, NULL, 10);
+
+	if (fmt == CMIT_FMT_EMAIL) {
+		char *name_tail = strchr(line, '<');
+		int display_name_length;
+		if (!name_tail)
+			return;
+		while (line < name_tail && isspace(name_tail[-1]))
+			name_tail--;
+		display_name_length = name_tail - line;
+		filler = "";
+		strbuf_addstr(sb, "From: ");
+		add_rfc2047(sb, line, display_name_length, encoding);
+		strbuf_add(sb, name_tail, namelen - display_name_length);
+		strbuf_addch(sb, '\n');
+	} else {
+		strbuf_addf(sb, "%s: %.*s%.*s\n", what,
+			      (fmt == CMIT_FMT_FULLER) ? 4 : 0,
+			      filler, namelen, line);
+	}
+	switch (fmt) {
+	case CMIT_FMT_MEDIUM:
+		strbuf_addf(sb, "Date:   %s\n", show_date(time, tz, dmode));
+		break;
+	case CMIT_FMT_EMAIL:
+		strbuf_addf(sb, "Date: %s\n", show_date(time, tz, DATE_RFC2822));
+		break;
+	case CMIT_FMT_FULLER:
+		strbuf_addf(sb, "%sDate: %s\n", what, show_date(time, tz, dmode));
+		break;
+	default:
+		/* notin' */
+		break;
+	}
+}
+
+static int is_empty_line(const char *line, int *len_p)
+{
+	int len = *len_p;
+	while (len && isspace(line[len-1]))
+		len--;
+	*len_p = len;
+	return !len;
+}
+
+static void add_merge_info(enum cmit_fmt fmt, struct strbuf *sb,
+			const struct commit *commit, int abbrev)
+{
+	struct commit_list *parent = commit->parents;
+
+	if ((fmt == CMIT_FMT_ONELINE) || (fmt == CMIT_FMT_EMAIL) ||
+	    !parent || !parent->next)
+		return;
+
+	strbuf_addstr(sb, "Merge:");
+
+	while (parent) {
+		struct commit *p = parent->item;
+		const char *hex = NULL;
+		const char *dots;
+		if (abbrev)
+			hex = find_unique_abbrev(p->object.sha1, abbrev);
+		if (!hex)
+			hex = sha1_to_hex(p->object.sha1);
+		dots = (abbrev && strlen(hex) != 40) ?  "..." : "";
+		parent = parent->next;
+
+		strbuf_addf(sb, " %s%s", hex, dots);
+	}
+	strbuf_addch(sb, '\n');
+}
+
+static char *get_header(const struct commit *commit, const char *key)
+{
+	int key_len = strlen(key);
+	const char *line = commit->buffer;
+
+	for (;;) {
+		const char *eol = strchr(line, '\n'), *next;
+
+		if (line == eol)
+			return NULL;
+		if (!eol) {
+			eol = line + strlen(line);
+			next = NULL;
+		} else
+			next = eol + 1;
+		if (eol - line > key_len &&
+		    !strncmp(line, key, key_len) &&
+		    line[key_len] == ' ') {
+			return xmemdupz(line + key_len + 1, eol - line - key_len - 1);
+		}
+		line = next;
+	}
+}
+
+static char *replace_encoding_header(char *buf, const char *encoding)
+{
+	struct strbuf tmp;
+	size_t start, len;
+	char *cp = buf;
+
+	/* guess if there is an encoding header before a \n\n */
+	while (strncmp(cp, "encoding ", strlen("encoding "))) {
+		cp = strchr(cp, '\n');
+		if (!cp || *++cp == '\n')
+			return buf;
+	}
+	start = cp - buf;
+	cp = strchr(cp, '\n');
+	if (!cp)
+		return buf; /* should not happen but be defensive */
+	len = cp + 1 - (buf + start);
+
+	strbuf_init(&tmp, 0);
+	strbuf_attach(&tmp, buf, strlen(buf), strlen(buf) + 1);
+	if (is_encoding_utf8(encoding)) {
+		/* we have re-coded to UTF-8; drop the header */
+		strbuf_remove(&tmp, start, len);
+	} else {
+		/* just replaces XXXX in 'encoding XXXX\n' */
+		strbuf_splice(&tmp, start + strlen("encoding "),
+					  len - strlen("encoding \n"),
+					  encoding, strlen(encoding));
+	}
+	return strbuf_detach(&tmp, NULL);
+}
+
+static char *logmsg_reencode(const struct commit *commit,
+			     const char *output_encoding)
+{
+	static const char *utf8 = "utf-8";
+	const char *use_encoding;
+	char *encoding;
+	char *out;
+
+	if (!*output_encoding)
+		return NULL;
+	encoding = get_header(commit, "encoding");
+	use_encoding = encoding ? encoding : utf8;
+	if (!strcmp(use_encoding, output_encoding))
+		if (encoding) /* we'll strip encoding header later */
+			out = xstrdup(commit->buffer);
+		else
+			return NULL; /* nothing to do */
+	else
+		out = reencode_string(commit->buffer,
+				      output_encoding, use_encoding);
+	if (out)
+		out = replace_encoding_header(out, output_encoding);
+
+	free(encoding);
+	return out;
+}
+
+static void fill_person(struct interp *table, const char *msg, int len)
+{
+	int start, end, tz = 0;
+	unsigned long date;
+	char *ep;
+
+	/* parse name */
+	for (end = 0; end < len && msg[end] != '<'; end++)
+		; /* do nothing */
+	start = end + 1;
+	while (end > 0 && isspace(msg[end - 1]))
+		end--;
+	table[0].value = xmemdupz(msg, end);
+
+	if (start >= len)
+		return;
+
+	/* parse email */
+	for (end = start + 1; end < len && msg[end] != '>'; end++)
+		; /* do nothing */
+
+	if (end >= len)
+		return;
+
+	table[1].value = xmemdupz(msg + start, end - start);
+
+	/* parse date */
+	for (start = end + 1; start < len && isspace(msg[start]); start++)
+		; /* do nothing */
+	if (start >= len)
+		return;
+	date = strtoul(msg + start, &ep, 10);
+	if (msg + start == ep)
+		return;
+
+	table[5].value = xmemdupz(msg + start, ep - (msg + start));
+
+	/* parse tz */
+	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
+		; /* do nothing */
+	if (start + 1 < len) {
+		tz = strtoul(msg + start + 1, NULL, 10);
+		if (msg[start] == '-')
+			tz = -tz;
+	}
+
+	interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
+	interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
+	interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
+	interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
+}
+
+void format_commit_message(const struct commit *commit,
+                           const void *format, struct strbuf *sb)
+{
+	struct interp table[] = {
+		{ "%H" },	/* commit hash */
+		{ "%h" },	/* abbreviated commit hash */
+		{ "%T" },	/* tree hash */
+		{ "%t" },	/* abbreviated tree hash */
+		{ "%P" },	/* parent hashes */
+		{ "%p" },	/* abbreviated parent hashes */
+		{ "%an" },	/* author name */
+		{ "%ae" },	/* author email */
+		{ "%ad" },	/* author date */
+		{ "%aD" },	/* author date, RFC2822 style */
+		{ "%ar" },	/* author date, relative */
+		{ "%at" },	/* author date, UNIX timestamp */
+		{ "%ai" },	/* author date, ISO 8601 */
+		{ "%cn" },	/* committer name */
+		{ "%ce" },	/* committer email */
+		{ "%cd" },	/* committer date */
+		{ "%cD" },	/* committer date, RFC2822 style */
+		{ "%cr" },	/* committer date, relative */
+		{ "%ct" },	/* committer date, UNIX timestamp */
+		{ "%ci" },	/* committer date, ISO 8601 */
+		{ "%e" },	/* encoding */
+		{ "%s" },	/* subject */
+		{ "%b" },	/* body */
+		{ "%Cred" },	/* red */
+		{ "%Cgreen" },	/* green */
+		{ "%Cblue" },	/* blue */
+		{ "%Creset" },	/* reset color */
+		{ "%n" },	/* newline */
+		{ "%m" },	/* left/right/bottom */
+	};
+	enum interp_index {
+		IHASH = 0, IHASH_ABBREV,
+		ITREE, ITREE_ABBREV,
+		IPARENTS, IPARENTS_ABBREV,
+		IAUTHOR_NAME, IAUTHOR_EMAIL,
+		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
+		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
+		ICOMMITTER_NAME, ICOMMITTER_EMAIL,
+		ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
+		ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
+		ICOMMITTER_ISO8601,
+		IENCODING,
+		ISUBJECT,
+		IBODY,
+		IRED, IGREEN, IBLUE, IRESET_COLOR,
+		INEWLINE,
+		ILEFT_RIGHT,
+	};
+	struct commit_list *p;
+	char parents[1024];
+	unsigned long len;
+	int i;
+	enum { HEADER, SUBJECT, BODY } state;
+	const char *msg = commit->buffer;
+
+	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
+		die("invalid interp table!");
+
+	/* these are independent of the commit */
+	interp_set_entry(table, IRED, "\033[31m");
+	interp_set_entry(table, IGREEN, "\033[32m");
+	interp_set_entry(table, IBLUE, "\033[34m");
+	interp_set_entry(table, IRESET_COLOR, "\033[m");
+	interp_set_entry(table, INEWLINE, "\n");
+
+	/* these depend on the commit */
+	if (!commit->object.parsed)
+		parse_object(commit->object.sha1);
+	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
+	interp_set_entry(table, IHASH_ABBREV,
+			find_unique_abbrev(commit->object.sha1,
+				DEFAULT_ABBREV));
+	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
+	interp_set_entry(table, ITREE_ABBREV,
+			find_unique_abbrev(commit->tree->object.sha1,
+				DEFAULT_ABBREV));
+	interp_set_entry(table, ILEFT_RIGHT,
+			 (commit->object.flags & BOUNDARY)
+			 ? "-"
+			 : (commit->object.flags & SYMMETRIC_LEFT)
+			 ? "<"
+			 : ">");
+
+	parents[1] = 0;
+	for (i = 0, p = commit->parents;
+			p && i < sizeof(parents) - 1;
+			p = p->next)
+		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
+			sha1_to_hex(p->item->object.sha1));
+	interp_set_entry(table, IPARENTS, parents + 1);
+
+	parents[1] = 0;
+	for (i = 0, p = commit->parents;
+			p && i < sizeof(parents) - 1;
+			p = p->next)
+		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
+			find_unique_abbrev(p->item->object.sha1,
+				DEFAULT_ABBREV));
+	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
+
+	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
+		int eol;
+		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
+			; /* do nothing */
+
+		if (state == SUBJECT) {
+			table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
+			i = eol;
+		}
+		if (i == eol) {
+			state++;
+			/* strip empty lines */
+			while (msg[eol + 1] == '\n')
+				eol++;
+		} else if (!prefixcmp(msg + i, "author "))
+			fill_person(table + IAUTHOR_NAME,
+					msg + i + 7, eol - i - 7);
+		else if (!prefixcmp(msg + i, "committer "))
+			fill_person(table + ICOMMITTER_NAME,
+					msg + i + 10, eol - i - 10);
+		else if (!prefixcmp(msg + i, "encoding "))
+			table[IENCODING].value =
+				xmemdupz(msg + i + 9, eol - i - 9);
+		i = eol;
+	}
+	if (msg[i])
+		table[IBODY].value = xstrdup(msg + i);
+
+	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
+				format, table, ARRAY_SIZE(table));
+	if (len > strbuf_avail(sb)) {
+		strbuf_grow(sb, len);
+		interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
+					format, table, ARRAY_SIZE(table));
+	}
+	strbuf_setlen(sb, sb->len + len);
+	interp_clear_table(table, ARRAY_SIZE(table));
+}
+
+static void pp_header(enum cmit_fmt fmt,
+		      int abbrev,
+		      enum date_mode dmode,
+		      const char *encoding,
+		      const struct commit *commit,
+		      const char **msg_p,
+		      struct strbuf *sb)
+{
+	int parents_shown = 0;
+
+	for (;;) {
+		const char *line = *msg_p;
+		int linelen = get_one_line(*msg_p);
+
+		if (!linelen)
+			return;
+		*msg_p += linelen;
+
+		if (linelen == 1)
+			/* End of header */
+			return;
+
+		if (fmt == CMIT_FMT_RAW) {
+			strbuf_add(sb, line, linelen);
+			continue;
+		}
+
+		if (!memcmp(line, "parent ", 7)) {
+			if (linelen != 48)
+				die("bad parent line in commit");
+			continue;
+		}
+
+		if (!parents_shown) {
+			struct commit_list *parent;
+			int num;
+			for (parent = commit->parents, num = 0;
+			     parent;
+			     parent = parent->next, num++)
+				;
+			/* with enough slop */
+			strbuf_grow(sb, num * 50 + 20);
+			add_merge_info(fmt, sb, commit, abbrev);
+			parents_shown = 1;
+		}
+
+		/*
+		 * MEDIUM == DEFAULT shows only author with dates.
+		 * FULL shows both authors but not dates.
+		 * FULLER shows both authors and dates.
+		 */
+		if (!memcmp(line, "author ", 7)) {
+			strbuf_grow(sb, linelen + 80);
+			add_user_info("Author", fmt, sb, line + 7, dmode, encoding);
+		}
+		if (!memcmp(line, "committer ", 10) &&
+		    (fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER)) {
+			strbuf_grow(sb, linelen + 80);
+			add_user_info("Commit", fmt, sb, line + 10, dmode, encoding);
+		}
+	}
+}
+
+static void pp_title_line(enum cmit_fmt fmt,
+			  const char **msg_p,
+			  struct strbuf *sb,
+			  const char *subject,
+			  const char *after_subject,
+			  const char *encoding,
+			  int plain_non_ascii)
+{
+	struct strbuf title;
+
+	strbuf_init(&title, 80);
+
+	for (;;) {
+		const char *line = *msg_p;
+		int linelen = get_one_line(line);
+
+		*msg_p += linelen;
+		if (!linelen || is_empty_line(line, &linelen))
+			break;
+
+		strbuf_grow(&title, linelen + 2);
+		if (title.len) {
+			if (fmt == CMIT_FMT_EMAIL) {
+				strbuf_addch(&title, '\n');
+			}
+			strbuf_addch(&title, ' ');
+		}
+		strbuf_add(&title, line, linelen);
+	}
+
+	strbuf_grow(sb, title.len + 1024);
+	if (subject) {
+		strbuf_addstr(sb, subject);
+		add_rfc2047(sb, title.buf, title.len, encoding);
+	} else {
+		strbuf_addbuf(sb, &title);
+	}
+	strbuf_addch(sb, '\n');
+
+	if (plain_non_ascii) {
+		const char *header_fmt =
+			"MIME-Version: 1.0\n"
+			"Content-Type: text/plain; charset=%s\n"
+			"Content-Transfer-Encoding: 8bit\n";
+		strbuf_addf(sb, header_fmt, encoding);
+	}
+	if (after_subject) {
+		strbuf_addstr(sb, after_subject);
+	}
+	if (fmt == CMIT_FMT_EMAIL) {
+		strbuf_addch(sb, '\n');
+	}
+	strbuf_release(&title);
+}
+
+static void pp_remainder(enum cmit_fmt fmt,
+			 const char **msg_p,
+			 struct strbuf *sb,
+			 int indent)
+{
+	int first = 1;
+	for (;;) {
+		const char *line = *msg_p;
+		int linelen = get_one_line(line);
+		*msg_p += linelen;
+
+		if (!linelen)
+			break;
+
+		if (is_empty_line(line, &linelen)) {
+			if (first)
+				continue;
+			if (fmt == CMIT_FMT_SHORT)
+				break;
+		}
+		first = 0;
+
+		strbuf_grow(sb, linelen + indent + 20);
+		if (indent) {
+			memset(sb->buf + sb->len, ' ', indent);
+			strbuf_setlen(sb, sb->len + indent);
+		}
+		strbuf_add(sb, line, linelen);
+		strbuf_addch(sb, '\n');
+	}
+}
+
+void pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
+				  struct strbuf *sb, int abbrev,
+				  const char *subject, const char *after_subject,
+				  enum date_mode dmode, int plain_non_ascii)
+{
+	unsigned long beginning_of_body;
+	int indent = 4;
+	const char *msg = commit->buffer;
+	char *reencoded;
+	const char *encoding;
+
+	if (fmt == CMIT_FMT_USERFORMAT) {
+		format_commit_message(commit, user_format, sb);
+		return;
+	}
+
+	encoding = (git_log_output_encoding
+		    ? git_log_output_encoding
+		    : git_commit_encoding);
+	if (!encoding)
+		encoding = "utf-8";
+	reencoded = logmsg_reencode(commit, encoding);
+	if (reencoded) {
+		msg = reencoded;
+	}
+
+	if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
+		indent = 0;
+
+	/* After-subject is used to pass in Content-Type: multipart
+	 * MIME header; in that case we do not have to do the
+	 * plaintext content type even if the commit message has
+	 * non 7-bit ASCII character.  Otherwise, check if we need
+	 * to say this is not a 7-bit ASCII.
+	 */
+	if (fmt == CMIT_FMT_EMAIL && !after_subject) {
+		int i, ch, in_body;
+
+		for (in_body = i = 0; (ch = msg[i]); i++) {
+			if (!in_body) {
+				/* author could be non 7-bit ASCII but
+				 * the log may be so; skip over the
+				 * header part first.
+				 */
+				if (ch == '\n' && msg[i+1] == '\n')
+					in_body = 1;
+			}
+			else if (non_ascii(ch)) {
+				plain_non_ascii = 1;
+				break;
+			}
+		}
+	}
+
+	pp_header(fmt, abbrev, dmode, encoding, commit, &msg, sb);
+	if (fmt != CMIT_FMT_ONELINE && !subject) {
+		strbuf_addch(sb, '\n');
+	}
+
+	/* Skip excess blank lines at the beginning of body, if any... */
+	for (;;) {
+		int linelen = get_one_line(msg);
+		int ll = linelen;
+		if (!linelen)
+			break;
+		if (!is_empty_line(msg, &ll))
+			break;
+		msg += linelen;
+	}
+
+	/* These formats treat the title line specially. */
+	if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
+		pp_title_line(fmt, &msg, sb, subject,
+			      after_subject, encoding, plain_non_ascii);
+
+	beginning_of_body = sb->len;
+	if (fmt != CMIT_FMT_ONELINE)
+		pp_remainder(fmt, &msg, sb, indent);
+	strbuf_rtrim(sb);
+
+	/* Make sure there is an EOLN for the non-oneline case */
+	if (fmt != CMIT_FMT_ONELINE)
+		strbuf_addch(sb, '\n');
+
+	/*
+	 * The caller may append additional body text in e-mail
+	 * format.  Make sure we did not strip the blank line
+	 * between the header and the body.
+	 */
+	if (fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body)
+		strbuf_addch(sb, '\n');
+	free(reencoded);
+}
-- 
1.5.3.5.1549.g91a3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/3] interpolate.[ch]: Add a function to find which interpolations are active.
  2007-11-04 19:14 [PATCH 0/3] Make user formatted commit listing less expensive Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 1/3] Split off the pretty print stuff into its own file Johannes Schindelin
@ 2007-11-04 19:15 ` Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
  2 siblings, 0 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-04 19:15 UTC (permalink / raw)
  To: git, Rene Scharfe, gitster


Some substitutions require pretty expensive operations.  So it make
sense to find out which are needed to begin with.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 interpolate.c |   20 ++++++++++++++++++++
 interpolate.h |    2 ++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/interpolate.c b/interpolate.c
index 6ef53f2..80eeb36 100644
--- a/interpolate.c
+++ b/interpolate.c
@@ -102,3 +102,23 @@ unsigned long interpolate(char *result, unsigned long reslen,
 		*dest = '\0';
 	return newlen;
 }
+
+char *interp_find_active(const char *orig,
+		const struct interp *interps, int ninterps)
+{
+	char *result = xcalloc(1, ninterps);
+	char c;
+	int i;
+
+	while ((c = *(orig++)))
+		if (c == '%')
+			/* Try to match an interpolation string. */
+			for (i = 0; i < ninterps; i++)
+				if (!prefixcmp(orig, interps[i].name + 1)) {
+					result[i] = 1;
+					orig += strlen(interps[i].name + 1);
+					break;
+				}
+
+	return result;
+}
diff --git a/interpolate.h b/interpolate.h
index 77407e6..2d197c5 100644
--- a/interpolate.h
+++ b/interpolate.h
@@ -22,5 +22,7 @@ extern void interp_clear_table(struct interp *table, int ninterps);
 extern unsigned long interpolate(char *result, unsigned long reslen,
 				 const char *orig,
 				 const struct interp *interps, int ninterps);
+extern char *interp_find_active(const char *orig,
+				const struct interp *interps, int ninterps);
 
 #endif /* INTERPOLATE_H */
-- 
1.5.3.5.1549.g91a3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-04 19:14 [PATCH 0/3] Make user formatted commit listing less expensive Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 1/3] Split off the pretty print stuff into its own file Johannes Schindelin
  2007-11-04 19:15 ` [PATCH 2/3] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
@ 2007-11-04 19:15 ` Johannes Schindelin
  2007-11-05 19:51   ` Junio C Hamano
  2 siblings, 1 reply; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-04 19:15 UTC (permalink / raw)
  To: git, Rene Scharfe, gitster


Use the new function interp_find_active() to avoid calculating the
unique hash names, and other things, when they are not even asked for.

Unfortunately, we cannot reuse the result of that function, which
would be cleaner: there are more users than just git log.  Most
notably, git-archive with "$Format:...$" substitution.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
	So I found another reason why the function has to be called
	everytime.  But this reason appeals to me much more.

	Originally, I wanted to do this differently, by providing a
	function which generates the substitutions, but the header
	parsing makes that infeasible.

 pretty.c |   55 ++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/pretty.c b/pretty.c
index 490cede..241e91c 100644
--- a/pretty.c
+++ b/pretty.c
@@ -393,6 +393,7 @@ void format_commit_message(const struct commit *commit,
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
+	char *active = interp_find_active(format, table, ARRAY_SIZE(table));
 
 	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
 		die("invalid interp table!");
@@ -407,12 +408,18 @@ void format_commit_message(const struct commit *commit,
 	/* these depend on the commit */
 	if (!commit->object.parsed)
 		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
+	if (active[IHASH])
+		interp_set_entry(table, IHASH,
+				sha1_to_hex(commit->object.sha1));
+	if (active[IHASH_ABBREV])
+		interp_set_entry(table, IHASH_ABBREV,
 			find_unique_abbrev(commit->object.sha1,
 				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
+	if (active[ITREE])
+		interp_set_entry(table, ITREE,
+				sha1_to_hex(commit->tree->object.sha1));
+	if (active[ITREE_ABBREV])
+		interp_set_entry(table, ITREE_ABBREV,
 			find_unique_abbrev(commit->tree->object.sha1,
 				DEFAULT_ABBREV));
 	interp_set_entry(table, ILEFT_RIGHT,
@@ -422,22 +429,27 @@ void format_commit_message(const struct commit *commit,
 			 ? "<"
 			 : ">");
 
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
+	if (active[IPARENTS]) {
+		parents[1] = 0;
+		for (i = 0, p = commit->parents;
+				p && i < sizeof(parents) - 1;
+				p = p->next)
+			i += snprintf(parents + i, sizeof(parents) - i - 1,
+				" %s", sha1_to_hex(p->item->object.sha1));
+		interp_set_entry(table, IPARENTS, parents + 1);
+	}
+
+	if (active[IPARENTS_ABBREV]) {
+		parents[1] = 0;
+		for (i = 0, p = commit->parents;
+				p && i < sizeof(parents) - 1;
+				p = p->next)
+			i += snprintf(parents + i, sizeof(parents) - i - 1,
+				" %s",
+				find_unique_abbrev(p->item->object.sha1,
+					DEFAULT_ABBREV));
+		interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
+	}
 
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
@@ -464,7 +476,7 @@ void format_commit_message(const struct commit *commit,
 				xmemdupz(msg + i + 9, eol - i - 9);
 		i = eol;
 	}
-	if (msg[i])
+	if (active[IBODY] && msg[i])
 		table[IBODY].value = xstrdup(msg + i);
 
 	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
@@ -476,6 +488,7 @@ void format_commit_message(const struct commit *commit,
 	}
 	strbuf_setlen(sb, sb->len + len);
 	interp_clear_table(table, ARRAY_SIZE(table));
+	free(active);
 }
 
 static void pp_header(enum cmit_fmt fmt,
-- 
1.5.3.5.1549.g91a3

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-04 19:15 ` [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
@ 2007-11-05 19:51   ` Junio C Hamano
  2007-11-05 20:21     ` René Scharfe
  0 siblings, 1 reply; 24+ messages in thread
From: Junio C Hamano @ 2007-11-05 19:51 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Rene Scharfe, gitster

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Unfortunately, we cannot reuse the result of that function, which
> would be cleaner: there are more users than just git log.  Most
> notably, git-archive with "$Format:...$" substitution.

That makes sense.


> diff --git a/pretty.c b/pretty.c
> index 490cede..241e91c 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -393,6 +393,7 @@ void format_commit_message(const struct commit *commit,
>  	int i;
>  	enum { HEADER, SUBJECT, BODY } state;
>  	const char *msg = commit->buffer;
> +	char *active = interp_find_active(format, table, ARRAY_SIZE(table));
> ...
> +	if (active[IHASH])
> +		interp_set_entry(table, IHASH,
> +				sha1_to_hex(commit->object.sha1));
> +	if (active[IHASH_ABBREV])
> +		interp_set_entry(table, IHASH_ABBREV,
>  			find_unique_abbrev(commit->object.sha1,
>  				DEFAULT_ABBREV));

Instead of allocating a separate array and freeing at the end,
wouldn't it make more sense to have a bitfield that records what
is used by the format string inside the array elements?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-05 19:51   ` Junio C Hamano
@ 2007-11-05 20:21     ` René Scharfe
  2007-11-05 20:25       ` Jon Loeliger
                         ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: René Scharfe @ 2007-11-05 20:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin, git

Junio C Hamano schrieb:
> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
>> Unfortunately, we cannot reuse the result of that function, which
>> would be cleaner: there are more users than just git log.  Most
>> notably, git-archive with "$Format:...$" substitution.
> 
> That makes sense.
> 
> 
>> diff --git a/pretty.c b/pretty.c
>> index 490cede..241e91c 100644
>> --- a/pretty.c
>> +++ b/pretty.c
>> @@ -393,6 +393,7 @@ void format_commit_message(const struct commit *commit,
>>  	int i;
>>  	enum { HEADER, SUBJECT, BODY } state;
>>  	const char *msg = commit->buffer;
>> +	char *active = interp_find_active(format, table, ARRAY_SIZE(table));
>> ...
>> +	if (active[IHASH])
>> +		interp_set_entry(table, IHASH,
>> +				sha1_to_hex(commit->object.sha1));
>> +	if (active[IHASH_ABBREV])
>> +		interp_set_entry(table, IHASH_ABBREV,
>>  			find_unique_abbrev(commit->object.sha1,
>>  				DEFAULT_ABBREV));
> 
> Instead of allocating a separate array and freeing at the end,
> wouldn't it make more sense to have a bitfield that records what
> is used by the format string inside the array elements?

How about (ab)using the value field?  Let interp_find_active() mark
unneeded entries with NULL, and the rest with some cookie.  All table
entries with non-NULL values need to be initialized.  interp_set_entry()
needs to be aware of this cookie, as it mustn't free() it.  The cookie
could be the address of a static char* in interpolate.c.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-05 20:21     ` René Scharfe
@ 2007-11-05 20:25       ` Jon Loeliger
  2007-11-05 23:53       ` Johannes Schindelin
  2007-11-06  1:06       ` Junio C Hamano
  2 siblings, 0 replies; 24+ messages in thread
From: Jon Loeliger @ 2007-11-05 20:25 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Johannes Schindelin, Git List

On Mon, 2007-11-05 at 14:21, René Scharfe wrote:

> How about (ab)using the value field?  Let interp_find_active() mark
> unneeded entries with NULL, and the rest with some cookie.  All table
> entries with non-NULL values need to be initialized.  interp_set_entry()
> needs to be aware of this cookie, as it mustn't free() it.  The cookie
> could be the address of a static char* in interpolate.c.

Or adding a "flags/properties" field and having it
note things like static entries, active, etc...?

jdl

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/3] Split off the pretty print stuff into its own file
  2007-11-04 19:15 ` [PATCH 1/3] Split off the pretty print stuff into its own file Johannes Schindelin
@ 2007-11-05 21:16   ` Junio C Hamano
  0 siblings, 0 replies; 24+ messages in thread
From: Junio C Hamano @ 2007-11-05 21:16 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Rene Scharfe, gitster

I'll take the [1/3] to 'master'.  I will see how discussion
goes about the rest on the list for now.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-05 20:21     ` René Scharfe
  2007-11-05 20:25       ` Jon Loeliger
@ 2007-11-05 23:53       ` Johannes Schindelin
  2007-11-06  1:06       ` Junio C Hamano
  2 siblings, 0 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-05 23:53 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git

Hi,

On Mon, 5 Nov 2007, Ren? Scharfe wrote:

> Junio C Hamano schrieb:
> > Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> > 
> >> Unfortunately, we cannot reuse the result of that function, which
> >> would be cleaner: there are more users than just git log.  Most
> >> notably, git-archive with "$Format:...$" substitution.
> > 
> > That makes sense.
> > 
> > 
> >> diff --git a/pretty.c b/pretty.c
> >> index 490cede..241e91c 100644
> >> --- a/pretty.c
> >> +++ b/pretty.c
> >> @@ -393,6 +393,7 @@ void format_commit_message(const struct commit *commit,
> >>  	int i;
> >>  	enum { HEADER, SUBJECT, BODY } state;
> >>  	const char *msg = commit->buffer;
> >> +	char *active = interp_find_active(format, table, ARRAY_SIZE(table));
> >> ...
> >> +	if (active[IHASH])
> >> +		interp_set_entry(table, IHASH,
> >> +				sha1_to_hex(commit->object.sha1));
> >> +	if (active[IHASH_ABBREV])
> >> +		interp_set_entry(table, IHASH_ABBREV,
> >>  			find_unique_abbrev(commit->object.sha1,
> >>  				DEFAULT_ABBREV));
> > 
> > Instead of allocating a separate array and freeing at the end,
> > wouldn't it make more sense to have a bitfield that records what
> > is used by the format string inside the array elements?
> 
> How about (ab)using the value field?  Let interp_find_active() mark
> unneeded entries with NULL, and the rest with some cookie.  All table
> entries with non-NULL values need to be initialized.  interp_set_entry()
> needs to be aware of this cookie, as it mustn't free() it.  The cookie
> could be the address of a static char* in interpolate.c.

Yeah, something like this on top of my earlier patch (and obviously the 
corresponding change from "if (active[IHASH])" to
"if (table[IHASH].value)"):

---

 interpolate.c |   10 ++++------
 interpolate.h |    2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/interpolate.c b/interpolate.c
index 80eeb36..05a22e1 100644
--- a/interpolate.c
+++ b/interpolate.c
@@ -5,13 +5,14 @@
 #include "git-compat-util.h"
 #include "interpolate.h"
 
+static const char *empty_value = "";
 
 void interp_set_entry(struct interp *table, int slot, const char *value)
 {
 	char *oldval = table[slot].value;
 	char *newval = NULL;
 
-	if (oldval)
+	if (oldval && oldval != empty_value)
 		free(oldval);
 
 	if (value)
@@ -103,10 +104,9 @@ unsigned long interpolate(char *result, unsigned long reslen,
 	return newlen;
 }
 
-char *interp_find_active(const char *orig,
+void interp_find_active(const char *orig,
 		const struct interp *interps, int ninterps)
 {
-	char *result = xcalloc(1, ninterps);
 	char c;
 	int i;
 
@@ -115,10 +115,8 @@ char *interp_find_active(const char *orig,
 			/* Try to match an interpolation string. */
 			for (i = 0; i < ninterps; i++)
 				if (!prefixcmp(orig, interps[i].name + 1)) {
-					result[i] = 1;
+					interps[i].value = empty_value;
 					orig += strlen(interps[i].name + 1);
 					break;
 				}
-
-	return result;
 }
diff --git a/interpolate.h b/interpolate.h
index 2d197c5..19b7ebe 100644
--- a/interpolate.h
+++ b/interpolate.h
@@ -22,7 +22,7 @@ extern void interp_clear_table(struct interp *table, int ninterps);
 extern unsigned long interpolate(char *result, unsigned long reslen,
 				 const char *orig,
 				 const struct interp *interps, int ninterps);
-extern char *interp_find_active(const char *orig,
+extern void interp_find_active(const char *orig,
 				const struct interp *interps, int ninterps);
 
 #endif /* INTERPOLATE_H */


Hmm?

Ciao,
Dscho

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-05 20:21     ` René Scharfe
  2007-11-05 20:25       ` Jon Loeliger
  2007-11-05 23:53       ` Johannes Schindelin
@ 2007-11-06  1:06       ` Junio C Hamano
  2007-11-06 22:31         ` René Scharfe
  2 siblings, 1 reply; 24+ messages in thread
From: Junio C Hamano @ 2007-11-06  1:06 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Schindelin, git

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> Junio C Hamano schrieb:
> ...
>> Instead of allocating a separate array and freeing at the end,
>> wouldn't it make more sense to have a bitfield that records what
>> is used by the format string inside the array elements?
>
> How about (ab)using the value field?  Let interp_find_active() mark
> unneeded entries with NULL, and the rest with some cookie.  All table
> entries with non-NULL values need to be initialized.  interp_set_entry()
> needs to be aware of this cookie, as it mustn't free() it.  The cookie
> could be the address of a static char* in interpolate.c.

Sorry, where is this aversion to making the struct a bit larger
coming from?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06  1:06       ` Junio C Hamano
@ 2007-11-06 22:31         ` René Scharfe
  2007-11-06 23:17           ` René Scharfe
  2007-11-06 23:36           ` Johannes Schindelin
  0 siblings, 2 replies; 24+ messages in thread
From: René Scharfe @ 2007-11-06 22:31 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin, git

Junio C Hamano schrieb:
> René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
> 
>> Junio C Hamano schrieb: ...
>>> Instead of allocating a separate array and freeing at the end, 
>>> wouldn't it make more sense to have a bitfield that records what 
>>> is used by the format string inside the array elements?
>> How about (ab)using the value field?  Let interp_find_active() mark
>>  unneeded entries with NULL, and the rest with some cookie.  All
>> table entries with non-NULL values need to be initialized.
>> interp_set_entry() needs to be aware of this cookie, as it mustn't
>> free() it.  The cookie could be the address of a static char* in
>> interpolate.c.
> 
> Sorry, where is this aversion to making the struct a bit larger 
> coming from?

Not from the rational part of my brain, for sure.  The following on
top of Dscho's second patch?  (A char would be smaller, but a bitfield
documents the intent better.)


diff --git a/interpolate.c b/interpolate.c
index 80eeb36..1e4ccaf 100644
--- a/interpolate.c
+++ b/interpolate.c
@@ -103,22 +103,21 @@ unsigned long interpolate(char *result, unsigned long reslen,
 	return newlen;
 }
 
-char *interp_find_active(const char *orig,
-		const struct interp *interps, int ninterps)
+void interp_find_active(const char *orig, struct interp *interps, int ninterps)
 {
-	char *result = xcalloc(1, ninterps);
 	char c;
 	int i;
 
+	for (i = 0; i < ninterps; i++)
+		interps[i].active = 0;
+
 	while ((c = *(orig++)))
 		if (c == '%')
 			/* Try to match an interpolation string. */
 			for (i = 0; i < ninterps; i++)
 				if (!prefixcmp(orig, interps[i].name + 1)) {
-					result[i] = 1;
+					interps[i].active = 1;
 					orig += strlen(interps[i].name + 1);
 					break;
 				}
-
-	return result;
 }
diff --git a/interpolate.h b/interpolate.h
index 2d197c5..a8ee6b9 100644
--- a/interpolate.h
+++ b/interpolate.h
@@ -14,6 +14,7 @@
 struct interp {
 	const char *name;
 	char *value;
+	unsigned active:1;
 };
 
 extern void interp_set_entry(struct interp *table, int slot, const char *value);
@@ -22,7 +23,6 @@ extern void interp_clear_table(struct interp *table, int ninterps);
 extern unsigned long interpolate(char *result, unsigned long reslen,
 				 const char *orig,
 				 const struct interp *interps, int ninterps);
-extern char *interp_find_active(const char *orig,
-				const struct interp *interps, int ninterps);
+extern void interp_find_active(const char *orig, struct interp *interps, int ninterps);
 
 #endif /* INTERPOLATE_H */

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 22:31         ` René Scharfe
@ 2007-11-06 23:17           ` René Scharfe
  2007-11-06 23:45             ` Johannes Schindelin
                               ` (2 more replies)
  2007-11-06 23:36           ` Johannes Schindelin
  1 sibling, 3 replies; 24+ messages in thread
From: René Scharfe @ 2007-11-06 23:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin, git

By the way, the more intrusive surgery required when using strbuf_expand()
leads to even faster operation.  Here my measurements of most of Paul's
test cases (best of three runs):

# master
$ time git log --pretty=oneline >/dev/null

real    0m0.390s
user    0m0.340s
sys     0m0.040s

# master
$ time git log --pretty=raw >/dev/null

real    0m0.434s
user    0m0.408s
sys     0m0.016s

# master
$ time git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m1.347s
user    0m0.080s
sys     0m1.256s

# interp_find_active
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.694s
user    0m0.020s
sys     0m0.672s

# strbuf_expand
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.395s
user    0m0.352s
sys     0m0.028s

I haven't seen any comments on strbuf_expand.  Is it too far out?
Here it is again, adjusted for current master and with the changes
to strbuf.[ch] coming first:


 strbuf.c |   22 +++++
 strbuf.h |    3 
 pretty.c |  276 ++++++++++++++++++++++++++++++++++-----------------------------
 3 files changed, 178 insertions(+), 123 deletions(-)

diff --git a/strbuf.c b/strbuf.c
index f4201e1..b71da99 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -129,6 +129,28 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_expand(struct strbuf *sb, const char *fmt,
+                   const char **placeholders, expand_fn_t fn, void *context)
+{
+	char c;
+	const char **p;
+
+	while ((c = *fmt++)) {
+		if (c != '%') {
+			strbuf_addch(sb, c);
+			continue;
+		}
+
+		for (p = placeholders; *p; p++) {
+			if (!prefixcmp(fmt, *p)) {
+				fn(sb, *p, context);
+				fmt += strlen(*p);
+				break;
+			}
+		}
+	}
+}
+
 size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
 {
 	size_t res;
diff --git a/strbuf.h b/strbuf.h
index cd7f295..95071d5 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 	strbuf_add(sb, sb2->buf, sb2->len);
 }
 
+typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *fmt, const char **placeholders, expand_fn_t fn, void *context);
+
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 
diff --git a/pretty.c b/pretty.c
index 490cede..ea644bb 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1,6 +1,5 @@
 #include "cache.h"
 #include "commit.h"
-#include "interpolate.h"
 #include "utf8.h"
 #include "diff.h"
 #include "revision.h"
@@ -283,7 +282,8 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void fill_person(struct interp *table, const char *msg, int len)
+static void format_person_part(struct strbuf *sb, char part,
+                               const char *msg, int len)
 {
 	int start, end, tz = 0;
 	unsigned long date;
@@ -295,7 +295,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	start = end + 1;
 	while (end > 0 && isspace(msg[end - 1]))
 		end--;
-	table[0].value = xmemdupz(msg, end);
+	if (part == 'n') {	/* name */
+		strbuf_add(sb, msg, end);
+		return;
+	}
 
 	if (start >= len)
 		return;
@@ -307,7 +310,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (end >= len)
 		return;
 
-	table[1].value = xmemdupz(msg + start, end - start);
+	if (part == 'e') {	/* email */
+		strbuf_add(sb, msg + start, end - start);
+		return;
+	}
 
 	/* parse date */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
@@ -318,7 +324,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (msg + start == ep)
 		return;
 
-	table[5].value = xmemdupz(msg + start, ep - (msg + start));
+	if (part == 't') {	/* date, UNIX timestamp */
+		strbuf_add(sb, msg + start, ep - (msg + start));
+		return;
+	}
 
 	/* parse tz */
 	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
@@ -329,123 +338,107 @@ static void fill_person(struct interp *table, const char *msg, int len)
 			tz = -tz;
 	}
 
-	interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
-	interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
-	interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
-	interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
+	switch (part) {
+	case 'd':	/* date */
+		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		return;
+	case 'D':	/* date, RFC2822 style */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		return;
+	case 'r':	/* date, relative */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		return;
+	case 'i':	/* date, ISO 8601 */
+		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		return;
+	}
 }
 
-void format_commit_message(const struct commit *commit,
-                           const void *format, struct strbuf *sb)
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
 {
-	struct interp table[] = {
-		{ "%H" },	/* commit hash */
-		{ "%h" },	/* abbreviated commit hash */
-		{ "%T" },	/* tree hash */
-		{ "%t" },	/* abbreviated tree hash */
-		{ "%P" },	/* parent hashes */
-		{ "%p" },	/* abbreviated parent hashes */
-		{ "%an" },	/* author name */
-		{ "%ae" },	/* author email */
-		{ "%ad" },	/* author date */
-		{ "%aD" },	/* author date, RFC2822 style */
-		{ "%ar" },	/* author date, relative */
-		{ "%at" },	/* author date, UNIX timestamp */
-		{ "%ai" },	/* author date, ISO 8601 */
-		{ "%cn" },	/* committer name */
-		{ "%ce" },	/* committer email */
-		{ "%cd" },	/* committer date */
-		{ "%cD" },	/* committer date, RFC2822 style */
-		{ "%cr" },	/* committer date, relative */
-		{ "%ct" },	/* committer date, UNIX timestamp */
-		{ "%ci" },	/* committer date, ISO 8601 */
-		{ "%e" },	/* encoding */
-		{ "%s" },	/* subject */
-		{ "%b" },	/* body */
-		{ "%Cred" },	/* red */
-		{ "%Cgreen" },	/* green */
-		{ "%Cblue" },	/* blue */
-		{ "%Creset" },	/* reset color */
-		{ "%n" },	/* newline */
-		{ "%m" },	/* left/right/bottom */
-	};
-	enum interp_index {
-		IHASH = 0, IHASH_ABBREV,
-		ITREE, ITREE_ABBREV,
-		IPARENTS, IPARENTS_ABBREV,
-		IAUTHOR_NAME, IAUTHOR_EMAIL,
-		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
-		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
-		ICOMMITTER_NAME, ICOMMITTER_EMAIL,
-		ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
-		ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
-		ICOMMITTER_ISO8601,
-		IENCODING,
-		ISUBJECT,
-		IBODY,
-		IRED, IGREEN, IBLUE, IRESET_COLOR,
-		INEWLINE,
-		ILEFT_RIGHT,
-	};
+	const struct commit *commit = context;
 	struct commit_list *p;
-	char parents[1024];
-	unsigned long len;
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
 
-	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
-		die("invalid interp table!");
-
 	/* these are independent of the commit */
-	interp_set_entry(table, IRED, "\033[31m");
-	interp_set_entry(table, IGREEN, "\033[32m");
-	interp_set_entry(table, IBLUE, "\033[34m");
-	interp_set_entry(table, IRESET_COLOR, "\033[m");
-	interp_set_entry(table, INEWLINE, "\n");
+	switch (placeholder[0]) {
+	case 'C':
+		switch (placeholder[3]) {
+		case 'd':	/* red */
+			strbuf_addstr(sb, "\033[31m");
+			return;
+		case 'e':	/* green */
+			strbuf_addstr(sb, "\033[32m");
+			return;
+		case 'u':	/* blue */
+			strbuf_addstr(sb, "\033[34m");
+			return;
+		case 's':	/* reset color */
+			strbuf_addstr(sb, "\033[m");
+			return;
+		}
+	case 'n':		/* newline */
+		strbuf_addch(sb, '\n');
+		return;
+	}
 
 	/* these depend on the commit */
 	if (!commit->object.parsed)
 		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
-			find_unique_abbrev(commit->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
-			find_unique_abbrev(commit->tree->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ILEFT_RIGHT,
-			 (commit->object.flags & BOUNDARY)
-			 ? "-"
-			 : (commit->object.flags & SYMMETRIC_LEFT)
-			 ? "<"
-			 : ">");
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
 
+	switch (placeholder[0]) {
+	case 'H':		/* commit hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
+		return;
+	case 'h':		/* abbreviated commit hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'T':		/* tree hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
+		return;
+	case 't':		/* abbreviated tree hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'P':		/* parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
+		}
+		return;
+	case 'p':		/* abbreviated parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, find_unique_abbrev(
+					p->item->object.sha1, DEFAULT_ABBREV));
+		}
+		return;
+	case 'm':		/* left/right/bottom */
+		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
+		                 ? '-'
+		                 : (commit->object.flags & SYMMETRIC_LEFT)
+		                 ? '<'
+		                 : '>');
+		return;
+	}
+
+	/* For the rest we have to parse the commit header. */
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
 		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
 			; /* do nothing */
 
 		if (state == SUBJECT) {
-			table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
+			if (placeholder[0] == 's') {
+				strbuf_add(sb, msg + i, eol - i);
+				return;
+			}
 			i = eol;
 		}
 		if (i == eol) {
@@ -453,29 +446,66 @@ void format_commit_message(const struct commit *commit,
 			/* strip empty lines */
 			while (msg[eol + 1] == '\n')
 				eol++;
-		} else if (!prefixcmp(msg + i, "author "))
-			fill_person(table + IAUTHOR_NAME,
-					msg + i + 7, eol - i - 7);
-		else if (!prefixcmp(msg + i, "committer "))
-			fill_person(table + ICOMMITTER_NAME,
-					msg + i + 10, eol - i - 10);
-		else if (!prefixcmp(msg + i, "encoding "))
-			table[IENCODING].value =
-				xmemdupz(msg + i + 9, eol - i - 9);
+		} else if (!prefixcmp(msg + i, "author ")) {
+			if (placeholder[0] == 'a') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 7, eol - i - 7);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "committer ")) {
+			if (placeholder[0] == 'c') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 10, eol - i - 10);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "encoding ")) {
+			if (placeholder[0] == 'e') {
+				strbuf_add(sb, msg + i + 9, eol - i - 9);
+				return;
+			}
+		}
 		i = eol;
 	}
-	if (msg[i])
-		table[IBODY].value = xstrdup(msg + i);
-
-	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
-				format, table, ARRAY_SIZE(table));
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
-					format, table, ARRAY_SIZE(table));
-	}
-	strbuf_setlen(sb, sb->len + len);
-	interp_clear_table(table, ARRAY_SIZE(table));
+	if (msg[i] && placeholder[0] == 'b')	/* body */
+		strbuf_addstr(sb, msg + i);
+}
+
+void format_commit_message(const struct commit *commit,
+                           const void *format, struct strbuf *sb)
+{
+	const char *placeholders[] = {
+		"H",		/* commit hash */
+		"h",		/* abbreviated commit hash */
+		"T",		/* tree hash */
+		"t",		/* abbreviated tree hash */
+		"P",		/* parent hashes */
+		"p",		/* abbreviated parent hashes */
+		"an",		/* author name */
+		"ae",		/* author email */
+		"ad",		/* author date */
+		"aD",		/* author date, RFC2822 style */
+		"ar",		/* author date, relative */
+		"at",		/* author date, UNIX timestamp */
+		"ai",		/* author date, ISO 8601 */
+		"cn",		/* committer name */
+		"ce",		/* committer email */
+		"cd",		/* committer date */
+		"cD",		/* committer date, RFC2822 style */
+		"cr",		/* committer date, relative */
+		"ct",		/* committer date, UNIX timestamp */
+		"ci",		/* committer date, ISO 8601 */
+		"e",		/* encoding */
+		"s",		/* subject */
+		"b",		/* body */
+		"Cred",		/* red */
+		"Cgreen",	/* green */
+		"Cblue",	/* blue */
+		"Creset",	/* reset color */
+		"n",		/* newline */
+		"m",		/* left/right/bottom */
+		NULL
+	};
+	strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
 }
 
 static void pp_header(enum cmit_fmt fmt,

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 22:31         ` René Scharfe
  2007-11-06 23:17           ` René Scharfe
@ 2007-11-06 23:36           ` Johannes Schindelin
  2007-11-06 23:38             ` [PATCH 1/2] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
  2007-11-06 23:38             ` [PATCH 2/2] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
  1 sibling, 2 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-06 23:36 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 482 bytes --]

Hi,

On Tue, 6 Nov 2007, René Scharfe wrote:

> -char *interp_find_active(const char *orig,
> -		const struct interp *interps, int ninterps)
> +void interp_find_active(const char *orig, struct interp *interps, int ninterps)
>  {
> -	char *result = xcalloc(1, ninterps);
>  	char c;
>  	int i;
>  
> +	for (i = 0; i < ninterps; i++)
> +		interps[i].active = 0;
> +
> [...]

Funny.

I have the _exact_ same change in my repository.  I only forgot to send 
it, it seems.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/2] interpolate.[ch]: Add a function to find which interpolations are active.
  2007-11-06 23:36           ` Johannes Schindelin
@ 2007-11-06 23:38             ` Johannes Schindelin
  2007-11-06 23:38             ` [PATCH 2/2] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
  1 sibling, 0 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-06 23:38 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git


Some substitutions require pretty expensive operations.  So it makes
sense to find out which are needed to begin with.  Call
interp_find_active() to set the new "active" field in the interpolation
table.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---

	This is the patch that Rene proposed, but squashed into my earlier
	patch.

 interpolate.c |   20 ++++++++++++++++++++
 interpolate.h |    3 +++
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/interpolate.c b/interpolate.c
index 6ef53f2..bbd89bb 100644
--- a/interpolate.c
+++ b/interpolate.c
@@ -102,3 +102,23 @@ unsigned long interpolate(char *result, unsigned long reslen,
 		*dest = '\0';
 	return newlen;
 }
+
+void interp_find_active(const char *orig,
+		struct interp *interps, int ninterps)
+{
+	char c;
+	int i;
+
+	for (i = 0; i < ninterps; i++)
+		interps[i].active = 0;
+
+	while ((c = *(orig++)))
+		if (c == '%')
+			/* Try to match an interpolation string. */
+			for (i = 0; i < ninterps; i++)
+				if (!prefixcmp(orig, interps[i].name + 1)) {
+					interps[i].active = 1;
+					orig += strlen(interps[i].name + 1);
+					break;
+				}
+}
diff --git a/interpolate.h b/interpolate.h
index 77407e6..d23531a 100644
--- a/interpolate.h
+++ b/interpolate.h
@@ -14,6 +14,7 @@
 struct interp {
 	const char *name;
 	char *value;
+	int active:1;
 };
 
 extern void interp_set_entry(struct interp *table, int slot, const char *value);
@@ -22,5 +23,7 @@ extern void interp_clear_table(struct interp *table, int ninterps);
 extern unsigned long interpolate(char *result, unsigned long reslen,
 				 const char *orig,
 				 const struct interp *interps, int ninterps);
+extern void interp_find_active(const char *orig,
+				struct interp *interps, int ninterps);
 
 #endif /* INTERPOLATE_H */
-- 
1.5.3.5.1597.g7191

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/2] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 23:36           ` Johannes Schindelin
  2007-11-06 23:38             ` [PATCH 1/2] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
@ 2007-11-06 23:38             ` Johannes Schindelin
  1 sibling, 0 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-06 23:38 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git


Use the new function interp_find_active() to avoid calculating the
unique hash names, and other things, when they are not even asked for.

Unfortunately, we cannot reuse the result of that function, which
would be cleaner: there are more users than just git log.  Most
notably, git-archive with "$Format:...$" substitution.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
---
 pretty.c |   55 ++++++++++++++++++++++++++++++++++---------------------
 1 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/pretty.c b/pretty.c
index 490cede..590de4c 100644
--- a/pretty.c
+++ b/pretty.c
@@ -394,6 +394,8 @@ void format_commit_message(const struct commit *commit,
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
 
+	interp_find_active(format, table, ARRAY_SIZE(table));
+
 	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
 		die("invalid interp table!");
 
@@ -407,12 +409,18 @@ void format_commit_message(const struct commit *commit,
 	/* these depend on the commit */
 	if (!commit->object.parsed)
 		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
+	if (table[IHASH].active)
+		interp_set_entry(table, IHASH,
+				sha1_to_hex(commit->object.sha1));
+	if (table[IHASH_ABBREV].active)
+		interp_set_entry(table, IHASH_ABBREV,
 			find_unique_abbrev(commit->object.sha1,
 				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
+	if (table[ITREE].active)
+		interp_set_entry(table, ITREE,
+				sha1_to_hex(commit->tree->object.sha1));
+	if (table[ITREE_ABBREV].active)
+		interp_set_entry(table, ITREE_ABBREV,
 			find_unique_abbrev(commit->tree->object.sha1,
 				DEFAULT_ABBREV));
 	interp_set_entry(table, ILEFT_RIGHT,
@@ -422,22 +430,27 @@ void format_commit_message(const struct commit *commit,
 			 ? "<"
 			 : ">");
 
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
+	if (table[IPARENTS].active) {
+		parents[1] = 0;
+		for (i = 0, p = commit->parents;
+				p && i < sizeof(parents) - 1;
+				p = p->next)
+			i += snprintf(parents + i, sizeof(parents) - i - 1,
+				" %s", sha1_to_hex(p->item->object.sha1));
+		interp_set_entry(table, IPARENTS, parents + 1);
+	}
+
+	if (table[IPARENTS_ABBREV].active) {
+		parents[1] = 0;
+		for (i = 0, p = commit->parents;
+				p && i < sizeof(parents) - 1;
+				p = p->next)
+			i += snprintf(parents + i, sizeof(parents) - i - 1,
+				" %s",
+				find_unique_abbrev(p->item->object.sha1,
+					DEFAULT_ABBREV));
+		interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
+	}
 
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
@@ -464,7 +477,7 @@ void format_commit_message(const struct commit *commit,
 				xmemdupz(msg + i + 9, eol - i - 9);
 		i = eol;
 	}
-	if (msg[i])
+	if (table[IBODY].active && msg[i])
 		table[IBODY].value = xstrdup(msg + i);
 
 	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
-- 
1.5.3.5.1597.g7191

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 23:17           ` René Scharfe
@ 2007-11-06 23:45             ` Johannes Schindelin
  2007-11-07 23:19               ` René Scharfe
  2007-11-07  0:11             ` Pierre Habouzit
  2007-11-07 20:43             ` Junio C Hamano
  2 siblings, 1 reply; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-06 23:45 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1062 bytes --]

Hi,

On Wed, 7 Nov 2007, René Scharfe wrote:

> By the way, the more intrusive surgery required when using strbuf_expand()
> leads to even faster operation.  Here my measurements of most of Paul's
> test cases (best of three runs):
>
> [...]

impressive timings.  Although I wonder where the time comes from, as the 
other substitutions should not be _that_ expensive.

In any case, your approach seems much more sensible, now that we have 
strbuf.

> diff --git a/strbuf.h b/strbuf.h
> index cd7f295..95071d5 100644
> --- a/strbuf.h
> +++ b/strbuf.h
> @@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
>  	strbuf_add(sb, sb2->buf, sb2->len);
>  }
>  
> +typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
> +extern void strbuf_expand(struct strbuf *sb, const char *fmt, const char **placeholders, expand_fn_t fn, void *context);

I wonder if it would even faster (but maybe not half as readable) if 
expand_fd_t got the placeholder_index instead of the placeholder.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations  when not needed
  2007-11-06 23:17           ` René Scharfe
  2007-11-06 23:45             ` Johannes Schindelin
@ 2007-11-07  0:11             ` Pierre Habouzit
  2007-11-07  0:14               ` Pierre Habouzit
  2007-11-07 20:43             ` Junio C Hamano
  2 siblings, 1 reply; 24+ messages in thread
From: Pierre Habouzit @ 2007-11-07  0:11 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Johannes Schindelin, git

[-- Attachment #1: Type: text/plain, Size: 1697 bytes --]

On Tue, Nov 06, 2007 at 11:17:14PM +0000, René Scharfe wrote:
> I haven't seen any comments on strbuf_expand.  Is it too far out?
> Here it is again, adjusted for current master and with the changes
> to strbuf.[ch] coming first:

  I have one.

>  strbuf.c |   22 +++++
>  strbuf.h |    3 
>  pretty.c |  276 ++++++++++++++++++++++++++++++++++-----------------------------
>  3 files changed, 178 insertions(+), 123 deletions(-)
> 
> diff --git a/strbuf.c b/strbuf.c
> index f4201e1..b71da99 100644
> --- a/strbuf.c
> +++ b/strbuf.c
> @@ -129,6 +129,28 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
>  	strbuf_setlen(sb, sb->len + len);
>  }
>  
> +void strbuf_expand(struct strbuf *sb, const char *fmt,
> +                   const char **placeholders, expand_fn_t fn, void *context)
> +{
> +	char c;
> +	const char **p;
> +
> +	while ((c = *fmt++)) {
> +		if (c != '%') {
> +			strbuf_addch(sb, c);
> +			continue;
> +		}

strbuf_addch is pretty inneficient as it puts NULs each time. rather
do that (sketchy) :

{
    for (;;) {
        const char *percent = strchr(fmt, '%');
        if (!percent)
            break;
        strbuf_add(sb, fmt, percent - fmt);
        fmt = percent + 1;

        /* do your stuff */
    }
    strbuf_addstr(sb, fmt);
}

Of course it's a detail as formats will probably be short. But it's a good
example to show to people wanting to write new strbuf functions.

This nitpicking apart, the timings are impressive.

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations  when not needed
  2007-11-07  0:11             ` Pierre Habouzit
@ 2007-11-07  0:14               ` Pierre Habouzit
  2007-11-07 23:21                 ` René Scharfe
  0 siblings, 1 reply; 24+ messages in thread
From: Pierre Habouzit @ 2007-11-07  0:14 UTC (permalink / raw)
  To: René Scharfe, Junio C Hamano, Johannes Schindelin, git

[-- Attachment #1: Type: text/plain, Size: 2070 bytes --]

On Wed, Nov 07, 2007 at 12:11:12AM +0000, Pierre Habouzit wrote:
> On Tue, Nov 06, 2007 at 11:17:14PM +0000, René Scharfe wrote:
> > I haven't seen any comments on strbuf_expand.  Is it too far out?
> > Here it is again, adjusted for current master and with the changes
> > to strbuf.[ch] coming first:
> 
>   I have one.
> 
> >  strbuf.c |   22 +++++
> >  strbuf.h |    3 
> >  pretty.c |  276 ++++++++++++++++++++++++++++++++++-----------------------------
> >  3 files changed, 178 insertions(+), 123 deletions(-)
> > 
> > diff --git a/strbuf.c b/strbuf.c
> > index f4201e1..b71da99 100644
> > --- a/strbuf.c
> > +++ b/strbuf.c
> > @@ -129,6 +129,28 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
> >  	strbuf_setlen(sb, sb->len + len);
> >  }
> >  
> > +void strbuf_expand(struct strbuf *sb, const char *fmt,
> > +                   const char **placeholders, expand_fn_t fn, void *context)
> > +{
> > +	char c;
> > +	const char **p;
> > +
> > +	while ((c = *fmt++)) {
> > +		if (c != '%') {
> > +			strbuf_addch(sb, c);
> > +			continue;
> > +		}
> 
> strbuf_addch is pretty inneficient as it puts NULs each time. rather
> do that (sketchy) :
> 
> {
>     for (;;) {
>         const char *percent = strchr(fmt, '%');
>         if (!percent)
>             break;
>         strbuf_add(sb, fmt, percent - fmt);
>         fmt = percent + 1;
> 
>         /* do your stuff */
>     }
>     strbuf_addstr(sb, fmt);
> }


Or if we are at this level of micro-optimization:

{
    const char *percent = strchrnul(fmt, '%');
    while (*percent) {
        strbuf_add(sb, fmt, percent - fmt);
        fmt = percent + 1;

        /* do your stuff */

        percent = strchrnul(fmt, '%');
    }
    strbuf_add(sb, fmt, percent - fmt);
}


Which would require strchrnul, but it's trivial compat/ material for sure.


-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 23:17           ` René Scharfe
  2007-11-06 23:45             ` Johannes Schindelin
  2007-11-07  0:11             ` Pierre Habouzit
@ 2007-11-07 20:43             ` Junio C Hamano
  2007-11-09  0:49               ` René Scharfe
  2 siblings, 1 reply; 24+ messages in thread
From: Junio C Hamano @ 2007-11-07 20:43 UTC (permalink / raw)
  To: René Scharfe; +Cc: Johannes Schindelin, git

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> I haven't seen any comments on strbuf_expand.  Is it too far out?
> Here it is again, adjusted for current master and with the changes
> to strbuf.[ch] coming first:

Numbers talk ;-).

In your previous round, you alluded that the strbuf_expand()
interface could allow caching of the return value of fn(), but I
do not think strbuf_expand() in this patch has anything to
directly support that notion.

Nor I would expect to --- fn() could keep the really expensive
information cached, keyed with context value, if it wanted to,
but in practice for the purpose of format_commit_item() I do not
offhand see anything cacheable and reusable, unless the user did
stupid things (e.g. use more than one %h in the format string).

I added a few paragraphs to describe the API in the commit log
message, and rewrote "# master" to "(master)" etc.

-- >8 --
pretty=format: Avoid some expensive calculations when not needed

This rewrites the custom format prettyprinter for commit objects
by introducing a new strbuf API function "strbuf_expand()".

The function takes a format string, list of placeholder strings,
a user supplied function 'fn', and an opaque pointer 'context'
to tell 'fn' what thingy to operate on.

The function 'fn' is expected to accept a strbuf, a parsed
placeholder string and the 'context' pointer, and append the
interpolated value for the 'context' thingy, according to the
format specified by the placeholder.

Here my measurements of most of Paul's test cases (best of
three runs):

(master)
$ time git log --pretty=oneline >/dev/null

...

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-06 23:45             ` Johannes Schindelin
@ 2007-11-07 23:19               ` René Scharfe
  2007-11-08  0:14                 ` Johannes Schindelin
  0 siblings, 1 reply; 24+ messages in thread
From: René Scharfe @ 2007-11-07 23:19 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, git

Johannes Schindelin schrieb:
> Hi,
> 
> On Wed, 7 Nov 2007, René Scharfe wrote:
> 
>> By the way, the more intrusive surgery required when using strbuf_expand()
>> leads to even faster operation.  Here my measurements of most of Paul's
>> test cases (best of three runs):
>>
>> [...]
> 
> impressive timings.  Although I wonder where the time comes from, as the 
> other substitutions should not be _that_ expensive.

I haven't run a profiler, but my two suspects are the malloc()s and
free()s done by interp_set_entry(), and the fact that
format_commit_message() calls interpolate() twice.

> In any case, your approach seems much more sensible, now that we have 
> strbuf.
> 
>> diff --git a/strbuf.h b/strbuf.h
>> index cd7f295..95071d5 100644
>> --- a/strbuf.h
>> +++ b/strbuf.h
>> @@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
>>  	strbuf_add(sb, sb2->buf, sb2->len);
>>  }
>>  
>> +typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
>> +extern void strbuf_expand(struct strbuf *sb, const char *fmt, const char **placeholders, expand_fn_t fn, void *context);
> 
> I wonder if it would even faster (but maybe not half as readable) if 
> expand_fd_t got the placeholder_index instead of the placeholder.

I doubt it.  All this would save is one pointer dereference per
placeholder.  I haven't tried and measured this, though.

René

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-07  0:14               ` Pierre Habouzit
@ 2007-11-07 23:21                 ` René Scharfe
  2007-11-07 23:31                   ` Pierre Habouzit
  0 siblings, 1 reply; 24+ messages in thread
From: René Scharfe @ 2007-11-07 23:21 UTC (permalink / raw)
  To: Pierre Habouzit, Junio C Hamano, Johannes Schindelin, git

Pierre Habouzit schrieb:
> {
>     const char *percent = strchrnul(fmt, '%');
>     while (*percent) {
>         strbuf_add(sb, fmt, percent - fmt);
>         fmt = percent + 1;
> 
>         /* do your stuff */
> 
>         percent = strchrnul(fmt, '%');
>     }
>     strbuf_add(sb, fmt, percent - fmt);
> }
> 
> 
> Which would require strchrnul, but it's trivial compat/ material for sure.

Grepping through the source I see several places that can be simplified
by converting them to strchrnul(), so I think introducing this GNU
extension is a good idea in any case.

Using strchr()/strchrnul() instead of strbuf_addch()'ing is sensible, of
course.  I don't like the duplicate code in your sketch above, though.
I'll try to look into it later today.

Thanks!
René

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations   when not needed
  2007-11-07 23:21                 ` René Scharfe
@ 2007-11-07 23:31                   ` Pierre Habouzit
  0 siblings, 0 replies; 24+ messages in thread
From: Pierre Habouzit @ 2007-11-07 23:31 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Johannes Schindelin, git

[-- Attachment #1: Type: text/plain, Size: 1393 bytes --]

On Wed, Nov 07, 2007 at 11:21:30PM +0000, René Scharfe wrote:
> Pierre Habouzit schrieb:
> > {
> >     const char *percent = strchrnul(fmt, '%');
> >     while (*percent) {
> >         strbuf_add(sb, fmt, percent - fmt);
> >         fmt = percent + 1;
> > 
> >         /* do your stuff */
> > 
> >         percent = strchrnul(fmt, '%');
> >     }
> >     strbuf_add(sb, fmt, percent - fmt);
> > }
> > 
> > 
> > Which would require strchrnul, but it's trivial compat/ material for sure.
> 
> Grepping through the source I see several places that can be simplified
> by converting them to strchrnul(), so I think introducing this GNU
> extension is a good idea in any case.

  Yes, strchrnul is _very_ useful for parsing.

> Using strchr()/strchrnul() instead of strbuf_addch()'ing is sensible, of
> course.  I don't like the duplicate code in your sketch above, though.
> I'll try to look into it later today.

Well you can alternatively write it this way of course:

for (;;) {
    const char *percent = strchrnul(fmt, '%');
    strbuf_add(sb, fmt, percent - fmt);
    if (!*percent)
        break /* or return probably */;
    fmt = percent + 1;

    /* do your stuff */
}

-- 
·O·  Pierre Habouzit
··O                                                madcoder@debian.org
OOO                                                http://www.madism.org

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-07 23:19               ` René Scharfe
@ 2007-11-08  0:14                 ` Johannes Schindelin
  0 siblings, 0 replies; 24+ messages in thread
From: Johannes Schindelin @ 2007-11-08  0:14 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1690 bytes --]

Hi,

On Thu, 8 Nov 2007, René Scharfe wrote:

> Johannes Schindelin schrieb:
> 
> > On Wed, 7 Nov 2007, René Scharfe wrote:
> > 
> >> By the way, the more intrusive surgery required when using 
> >> strbuf_expand() leads to even faster operation.  Here my measurements 
> >> of most of Paul's test cases (best of three runs):
> >>
> >> [...]
> > 
> > impressive timings.  Although I wonder where the time comes from, as 
> > the other substitutions should not be _that_ expensive.
> 
> I haven't run a profiler, but my two suspects are the malloc()s and 
> free()s done by interp_set_entry(), and the fact that 
> format_commit_message() calls interpolate() twice.

But of course! *slapsthehead*

> > In any case, your approach seems much more sensible, now that we have 
> > strbuf.
> > 
> >> diff --git a/strbuf.h b/strbuf.h
> >> index cd7f295..95071d5 100644
> >> --- a/strbuf.h
> >> +++ b/strbuf.h
> >> @@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
> >>  	strbuf_add(sb, sb2->buf, sb2->len);
> >>  }
> >>  
> >> +typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
> >> +extern void strbuf_expand(struct strbuf *sb, const char *fmt, const char **placeholders, expand_fn_t fn, void *context);
> > 
> > I wonder if it would even faster (but maybe not half as readable) if 
> > expand_fd_t got the placeholder_index instead of the placeholder.
> 
> I doubt it.  All this would save is one pointer dereference per
> placeholder.  I haven't tried and measured this, though.

My thinko.  I thought that you save one prefixcmp(), but strbuf_expand() 
offloads that to the callback function.

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed
  2007-11-07 20:43             ` Junio C Hamano
@ 2007-11-09  0:49               ` René Scharfe
  0 siblings, 0 replies; 24+ messages in thread
From: René Scharfe @ 2007-11-09  0:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin, git

Junio C Hamano schrieb:
> René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:
> 
>> I haven't seen any comments on strbuf_expand.  Is it too far out?
>> Here it is again, adjusted for current master and with the changes
>> to strbuf.[ch] coming first:
> 
> Numbers talk ;-).

:-)  I just hope someone else has compared the times, too..

> In your previous round, you alluded that the strbuf_expand()
> interface could allow caching of the return value of fn(), but I
> do not think strbuf_expand() in this patch has anything to
> directly support that notion.
> 
> Nor I would expect to --- fn() could keep the really expensive
> information cached, keyed with context value, if it wanted to,
> but in practice for the purpose of format_commit_item() I do not
> offhand see anything cacheable and reusable, unless the user did
> stupid things (e.g. use more than one %h in the format string).

Yes, that's what I arrived at, too, when I actually wrote and used
the function.

> I added a few paragraphs to describe the API in the commit log
> message, and rewrote "# master" to "(master)" etc.

Thanks!  I'll send a slightly enhanced version in a two-patch
series -- with an actual commit message, including your API
desciption -- in just a few seconds.

René

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2007-11-09  1:01 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-04 19:14 [PATCH 0/3] Make user formatted commit listing less expensive Johannes Schindelin
2007-11-04 19:15 ` [PATCH 1/3] Split off the pretty print stuff into its own file Johannes Schindelin
2007-11-05 21:16   ` Junio C Hamano
2007-11-04 19:15 ` [PATCH 2/3] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
2007-11-04 19:15 ` [PATCH 3/3] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin
2007-11-05 19:51   ` Junio C Hamano
2007-11-05 20:21     ` René Scharfe
2007-11-05 20:25       ` Jon Loeliger
2007-11-05 23:53       ` Johannes Schindelin
2007-11-06  1:06       ` Junio C Hamano
2007-11-06 22:31         ` René Scharfe
2007-11-06 23:17           ` René Scharfe
2007-11-06 23:45             ` Johannes Schindelin
2007-11-07 23:19               ` René Scharfe
2007-11-08  0:14                 ` Johannes Schindelin
2007-11-07  0:11             ` Pierre Habouzit
2007-11-07  0:14               ` Pierre Habouzit
2007-11-07 23:21                 ` René Scharfe
2007-11-07 23:31                   ` Pierre Habouzit
2007-11-07 20:43             ` Junio C Hamano
2007-11-09  0:49               ` René Scharfe
2007-11-06 23:36           ` Johannes Schindelin
2007-11-06 23:38             ` [PATCH 1/2] interpolate.[ch]: Add a function to find which interpolations are active Johannes Schindelin
2007-11-06 23:38             ` [PATCH 2/2] pretty=format: Avoid some expensive calculations when not needed Johannes Schindelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).