[PATCH] Avoid a useless prefix lookup in strbuf

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] Avoid a useless prefix lookup in strbuf_expand()
@ 2008-01-06  0:10 Marco Costalba
  0 siblings, 0 replies; 6+ messages in thread
From: Marco Costalba @ 2008-01-06  0:10 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List, René Scharfe

[-- Attachment #1: Type: text/plain, Size: 12231 bytes --]

Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.

Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()

This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---


To apply on top of "[PATCH] Fix an off by one bug in pretty.c"


I send also as attached file because I thing my mailer will
word wrap this one.


pretty.c |  228 +++++++++++++++++++++++++++++--------------------------------
 strbuf.c |   19 ++---
 strbuf.h |    4 +-
 3 files changed, 118 insertions(+), 133 deletions(-)

diff --git a/pretty.c b/pretty.c
index 3ce5e6f..5132b1f 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,76 +282,93 @@ static char *logmsg_reencode(const struct
 	return out;
 }

-static void format_person_part(struct strbuf *sb, char part,
-                               const char *msg, int len)
+static int parse_tz(char *ep, const char *msg, int len) {
+
+	int tz = 0, start = ep - msg + 1;
+
+	for ( ;start < len && isspace(msg[start]); start++)
+		; /* do nothing */
+
+	if (start + 1 < len) {
+		tz = strtoul(msg + start + 1, NULL, 10);
+		if (msg[start] == '-')
+			tz = -tz;
+	}
+	return tz;
+}
+
+static size_t format_person_part(struct strbuf *sb, char part,
+                                 const char *msg, int len)
 {
-	int start, end, tz = 0;
-	unsigned long date;
+	int start, end, tz = 0, date_valid;
+	unsigned long date = 0;
 	char *ep;

-	/* parse name */
+	/* advance 'end' to point to name end delimiter */
 	for (end = 0; end < len && msg[end] != '<'; end++)
 		; /* do nothing */
-	start = end + 1;
-	while (end > 0 && isspace(msg[end - 1]))
-		end--;
-	if (part == 'n') {	/* name */
+
+	if (part == 'n') { /* name */
+		while (end > 0 && isspace(msg[end - 1]))
+			end--;
+
 		strbuf_add(sb, msg, end);
-		return;
+		return 2;
 	}
+	start = ++end; /* save email start delimiter */

-	if (start >= len)
-		return;
-
-	/* parse email */
-	for (end = start; end < len && msg[end] != '>'; end++)
+	/* advance 'end' to point to email end delimiter */
+	for ( ; end < len && msg[end] != '>'; end++)
 		; /* do nothing */

-	if (end >= len)
-		return;
-
-	if (part == 'e') {	/* email */
-		strbuf_add(sb, msg + start, end - start);
-		return;
+	if (part == 'e') { /* email */
+		if (end - start > 0)
+			strbuf_add(sb, msg + start, end - start);
+		return 2;
 	}

-	/* parse date */
+	/* advance 'start' to point to date start delimiter */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
 		; /* do nothing */
-	if (start >= len)
-		return;
-	date = strtoul(msg + start, &ep, 10);
-	if (msg + start == ep)
-		return;

-	if (part == 't') {	/* date, UNIX timestamp */
-		strbuf_add(sb, msg + start, ep - (msg + start));
-		return;
-	}
+	date_valid = start < len;

-	/* parse tz */
-	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
-		; /* do nothing */
-	if (start + 1 < len) {
-		tz = strtoul(msg + start + 1, NULL, 10);
-		if (msg[start] == '-')
-			tz = -tz;
+	if (date_valid)
+		date = strtoul(msg + start, &ep, 10);
+
+	if (part == 't') { /* date, UNIX timestamp */
+		if (date_valid && msg + start != ep)
+			strbuf_add(sb, msg + start, ep - (msg + start));
+		return 2;
 	}

 	switch (part) {
-	case 'd':	/* date */
-		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
-		return;
-	case 'D':	/* date, RFC2822 style */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
-		return;
-	case 'r':	/* date, relative */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
-		return;
-	case 'i':	/* date, ISO 8601 */
-		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
-		return;
+	case 'd': /* date */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		}
+		return 2;
+	case 'D': /* date, RFC2822 style */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		}
+		return 2;
+	case 'r': /* date, relative */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		}
+		return 2;
+	case 'i': /* date, ISO 8601 */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		}
+		return 2;
 	}
+	return 0; /* unknown person part */
 }

 struct chunk {
@@ -432,8 +449,8 @@ static void parse_commit_header(struct format_
 	context->commit_header_parsed = 1;
 }

-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+                                 void *context)
 {
 	struct format_commit_context *c = context;
 	const struct commit *commit = c->commit;
@@ -443,23 +460,23 @@ static void format_commit_item(struct strbuf *sb,
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
 	case 'C':
-		switch (placeholder[3]) {
-		case 'd':	/* red */
+		if (!prefixcmp(placeholder + 1, "red")) {
 			strbuf_addstr(sb, "\033[31m");
-			return;
-		case 'e':	/* green */
+			return 4;
+		} else if (!prefixcmp(placeholder + 1, "green")) {
 			strbuf_addstr(sb, "\033[32m");
-			return;
-		case 'u':	/* blue */
+			return 6;
+		} else if (!prefixcmp(placeholder + 1, "blue")) {
 			strbuf_addstr(sb, "\033[34m");
-			return;
-		case 's':	/* reset color */
+			return 5;
+		} else if (!prefixcmp(placeholder + 1, "reset")) {
 			strbuf_addstr(sb, "\033[m");
-			return;
-		}
+			return 6;
+		} else
+			return 0;
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}

 	/* these depend on the commit */
@@ -469,34 +486,34 @@ static void format_commit_item(struct strbuf *sb,
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -505,14 +522,14 @@ static void format_commit_item(struct strbuf
*sb, const char *placeholder,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}

 	/* For the rest we have to parse the commit header. */
@@ -520,66 +537,37 @@ static void format_commit_item(struct strbuf *sb,
 		parse_commit_header(c);

 	switch (placeholder[0]) {
-	case 's':
+	case 's':		/* subject */
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
-	case 'a':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->author.off, c->author.len);
-		return;
-	case 'c':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->committer.off, c->committer.len);
-		return;
-	case 'e':
+		return 1;
+	case 'a':		/* author ... */
+		return format_person_part(sb, placeholder[1],
+		                       msg + c->author.off,
+		                       c->author.len);
+
+	case 'c':		/* committer ... */
+		return format_person_part(sb, placeholder[1],
+		                       msg + c->committer.off,
+		                       c->committer.len);
+
+	case 'e':		/* encoding */
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
-	case 'b':
+		return 1;
+	case 'b':		/* body */
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0;		/* unknown placeholder */
 }

 void format_commit_message(const struct commit *commit,
                            const void *format, struct strbuf *sb)
 {
-	const char *placeholders[] = {
-		"H",		/* commit hash */
-		"h",		/* abbreviated commit hash */
-		"T",		/* tree hash */
-		"t",		/* abbreviated tree hash */
-		"P",		/* parent hashes */
-		"p",		/* abbreviated parent hashes */
-		"an",		/* author name */
-		"ae",		/* author email */
-		"ad",		/* author date */
-		"aD",		/* author date, RFC2822 style */
-		"ar",		/* author date, relative */
-		"at",		/* author date, UNIX timestamp */
-		"ai",		/* author date, ISO 8601 */
-		"cn",		/* committer name */
-		"ce",		/* committer email */
-		"cd",		/* committer date */
-		"cD",		/* committer date, RFC2822 style */
-		"cr",		/* committer date, relative */
-		"ct",		/* committer date, UNIX timestamp */
-		"ci",		/* committer date, ISO 8601 */
-		"e",		/* encoding */
-		"s",		/* subject */
-		"b",		/* body */
-		"Cred",		/* red */
-		"Cgreen",	/* green */
-		"Cblue",	/* blue */
-		"Creset",	/* reset color */
-		"n",		/* newline */
-		"m",		/* left/right/bottom */
-		NULL
-	};
 	struct format_commit_context context;

 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
-	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+	strbuf_expand(sb, format, format_commit_item, &context);
 }

 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb,
 	strbuf_setlen(sb, sb->len + len);
 }

-void strbuf_expand(struct strbuf *sb, const char *format,
-                   const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+                   void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		size_t consumed;

 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb,
 			break;
 		format = percent + 1;

-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb,
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);

-typedef void (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format,
const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format,
expand_fn_t fn, void *context);

 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);

[-- Attachment #2: avoid_prefix_lookup.txt --]
[-- Type: text/plain, Size: 12243 bytes --]

Subject: [PATCH] Avoid a useless prefix lookup in strbuf_expand()

Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.

Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()

This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---


To apply on top of "[PATCH] Fix an off by one bug in pretty.c"


pretty.c |  228 +++++++++++++++++++++++++++++--------------------------------
 strbuf.c |   19 ++---
 strbuf.h |    4 +-
 3 files changed, 118 insertions(+), 133 deletions(-)

diff --git a/pretty.c b/pretty.c
index 3ce5e6f..5132b1f 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,76 +282,93 @@ static char *logmsg_reencode(const struct 
 	return out;
 }
 
-static void format_person_part(struct strbuf *sb, char part,
-                               const char *msg, int len)
+static int parse_tz(char *ep, const char *msg, int len) {
+
+	int tz = 0, start = ep - msg + 1;
+
+	for ( ;start < len && isspace(msg[start]); start++)
+		; /* do nothing */
+
+	if (start + 1 < len) {
+		tz = strtoul(msg + start + 1, NULL, 10);
+		if (msg[start] == '-')
+			tz = -tz;
+	}
+	return tz;
+}
+
+static size_t format_person_part(struct strbuf *sb, char part,
+                                 const char *msg, int len)
 {
-	int start, end, tz = 0;
-	unsigned long date;
+	int start, end, tz = 0, date_valid;
+	unsigned long date = 0;
 	char *ep;
 
-	/* parse name */
+	/* advance 'end' to point to name end delimiter */
 	for (end = 0; end < len && msg[end] != '<'; end++)
 		; /* do nothing */
-	start = end + 1;
-	while (end > 0 && isspace(msg[end - 1]))
-		end--;
-	if (part == 'n') {	/* name */
+
+	if (part == 'n') { /* name */
+		while (end > 0 && isspace(msg[end - 1]))
+			end--;
+
 		strbuf_add(sb, msg, end);
-		return;
+		return 2;
 	}
+	start = ++end; /* save email start delimiter */
 
-	if (start >= len)
-		return;
-
-	/* parse email */
-	for (end = start; end < len && msg[end] != '>'; end++)
+	/* advance 'end' to point to email end delimiter */
+	for ( ; end < len && msg[end] != '>'; end++)
 		; /* do nothing */
 
-	if (end >= len)
-		return;
-
-	if (part == 'e') {	/* email */
-		strbuf_add(sb, msg + start, end - start);
-		return;
+	if (part == 'e') { /* email */
+		if (end - start > 0)
+			strbuf_add(sb, msg + start, end - start);
+		return 2;
 	}
 
-	/* parse date */
+	/* advance 'start' to point to date start delimiter */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
 		; /* do nothing */
-	if (start >= len)
-		return;
-	date = strtoul(msg + start, &ep, 10);
-	if (msg + start == ep)
-		return;
 
-	if (part == 't') {	/* date, UNIX timestamp */
-		strbuf_add(sb, msg + start, ep - (msg + start));
-		return;
-	}
+	date_valid = start < len;
 
-	/* parse tz */
-	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
-		; /* do nothing */
-	if (start + 1 < len) {
-		tz = strtoul(msg + start + 1, NULL, 10);
-		if (msg[start] == '-')
-			tz = -tz;
+	if (date_valid)
+		date = strtoul(msg + start, &ep, 10);
+
+	if (part == 't') { /* date, UNIX timestamp */
+		if (date_valid && msg + start != ep)
+			strbuf_add(sb, msg + start, ep - (msg + start));
+		return 2;
 	}
 
 	switch (part) {
-	case 'd':	/* date */
-		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
-		return;
-	case 'D':	/* date, RFC2822 style */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
-		return;
-	case 'r':	/* date, relative */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
-		return;
-	case 'i':	/* date, ISO 8601 */
-		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
-		return;
+	case 'd': /* date */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		}
+		return 2;
+	case 'D': /* date, RFC2822 style */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		}
+		return 2;
+	case 'r': /* date, relative */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		}
+		return 2;
+	case 'i': /* date, ISO 8601 */
+		if (date_valid) {
+			tz = parse_tz(ep, msg, len);
+			strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		}
+		return 2;
 	}
+	return 0; /* unknown person part */
 }
 
 struct chunk {
@@ -432,8 +449,8 @@ static void parse_commit_header(struct format_
 	context->commit_header_parsed = 1;
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+                                 void *context)
 {
 	struct format_commit_context *c = context;
 	const struct commit *commit = c->commit;
@@ -443,23 +460,23 @@ static void format_commit_item(struct strbuf *sb, 
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
 	case 'C':
-		switch (placeholder[3]) {
-		case 'd':	/* red */
+		if (!prefixcmp(placeholder + 1, "red")) {
 			strbuf_addstr(sb, "\033[31m");
-			return;
-		case 'e':	/* green */
+			return 4;
+		} else if (!prefixcmp(placeholder + 1, "green")) {
 			strbuf_addstr(sb, "\033[32m");
-			return;
-		case 'u':	/* blue */
+			return 6;
+		} else if (!prefixcmp(placeholder + 1, "blue")) {
 			strbuf_addstr(sb, "\033[34m");
-			return;
-		case 's':	/* reset color */
+			return 5;
+		} else if (!prefixcmp(placeholder + 1, "reset")) {
 			strbuf_addstr(sb, "\033[m");
-			return;
-		}
+			return 6;
+		} else
+			return 0;
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}
 
 	/* these depend on the commit */
@@ -469,34 +486,34 @@ static void format_commit_item(struct strbuf *sb, 
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -505,14 +522,14 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}
 
 	/* For the rest we have to parse the commit header. */
@@ -520,66 +537,37 @@ static void format_commit_item(struct strbuf *sb,
 		parse_commit_header(c);
 
 	switch (placeholder[0]) {
-	case 's':
+	case 's':		/* subject */
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
-	case 'a':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->author.off, c->author.len);
-		return;
-	case 'c':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->committer.off, c->committer.len);
-		return;
-	case 'e':
+		return 1;
+	case 'a':		/* author ... */
+		return format_person_part(sb, placeholder[1],
+		                       msg + c->author.off,
+		                       c->author.len);
+
+	case 'c':		/* committer ... */
+		return format_person_part(sb, placeholder[1],
+		                       msg + c->committer.off,
+		                       c->committer.len);
+
+	case 'e':		/* encoding */
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
-	case 'b':
+		return 1;
+	case 'b':		/* body */
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0;		/* unknown placeholder */
 }
 
 void format_commit_message(const struct commit *commit,
                            const void *format, struct strbuf *sb)
 {
-	const char *placeholders[] = {
-		"H",		/* commit hash */
-		"h",		/* abbreviated commit hash */
-		"T",		/* tree hash */
-		"t",		/* abbreviated tree hash */
-		"P",		/* parent hashes */
-		"p",		/* abbreviated parent hashes */
-		"an",		/* author name */
-		"ae",		/* author email */
-		"ad",		/* author date */
-		"aD",		/* author date, RFC2822 style */
-		"ar",		/* author date, relative */
-		"at",		/* author date, UNIX timestamp */
-		"ai",		/* author date, ISO 8601 */
-		"cn",		/* committer name */
-		"ce",		/* committer email */
-		"cd",		/* committer date */
-		"cD",		/* committer date, RFC2822 style */
-		"cr",		/* committer date, relative */
-		"ct",		/* committer date, UNIX timestamp */
-		"ci",		/* committer date, ISO 8601 */
-		"e",		/* encoding */
-		"s",		/* subject */
-		"b",		/* body */
-		"Cred",		/* red */
-		"Cgreen",	/* green */
-		"Cblue",	/* blue */
-		"Creset",	/* reset color */
-		"n",		/* newline */
-		"m",		/* left/right/bottom */
-		NULL
-	};
 	struct format_commit_context context;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
-	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+	strbuf_expand(sb, format, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb, 
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_expand(struct strbuf *sb, const char *format,
-                   const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+                   void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		size_t consumed;
 
 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb, 
 			break;
 		format = percent + 1;
 
-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb, 
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
 
-typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
 
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] Avoid a useless prefix lookup in strbuf_expand()
@ 2007-12-30 13:46 Marco Costalba
  2008-01-02 18:11 ` René Scharfe
  0 siblings, 1 reply; 6+ messages in thread
From: Marco Costalba @ 2007-12-30 13:46 UTC (permalink / raw)
  To: Git Mailing List; +Cc: Junio C Hamano, Johannes Schindelin

Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.

Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()

This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---

This patch is somewhat experimental and is not intended to be merged as is.

That's what is missing:

- Matching of multi char prefixes is not 100% reliable, as example to match
  prefix "Cgreen" only the first 'C' and the third char 'e' is
checked, this could
  lead to aliases in case of malformed prefixes, as example something like
  "Cxxexxxx" will match the same.


- With this patch placeholders array defined in format_commit_message() becames
  useless. That code should be refactored to remove the vector and
perhaps add some
  stricter checking rules directly inside format_commit_item()


Anyhow with this patch we pass from


marco@localhost linux-2.6]$ time git log --topo-order --no-color
--parents -z --log-size --boundary
--pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null
2.89user 0.07system 0:02.96elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+27154minor)pagefaults 0swaps


With the super optimized prefixcmp() patch (see the other thread)

to the current

[marco@localhost linux-2.6]$ time git log --topo-order --no-color
--parents -z --log-size --boundary
--pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null
2.76user 0.08system 0:02.85elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+27153minor)pagefaults 0swaps


 pretty.c |   43 ++++++++++++++++++++++---------------------
 strbuf.c |   16 +++++++---------
 strbuf.h |    2 +-
 3 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/pretty.c b/pretty.c
index 5b1078b..6225042 100644
--- a/pretty.c
+++ b/pretty.c
@@ -432,7 +432,7 @@ static void parse_commit_header(struct
format_commit_context *context)
 	context->commit_header_parsed = 1;
 }

-static void format_commit_item(struct strbuf *sb, const char *placeholder,
+static int format_commit_item(struct strbuf *sb, const char *placeholder,
                                void *context)
 {
 	struct format_commit_context *c = context;
@@ -446,20 +446,20 @@ static void format_commit_item(struct strbuf *sb,
 		switch (placeholder[3]) {
 		case 'd':	/* red */
 			strbuf_addstr(sb, "\033[31m");
-			return;
+			return 4;
 		case 'e':	/* green */
 			strbuf_addstr(sb, "\033[32m");
-			return;
+			return 6;
 		case 'u':	/* blue */
 			strbuf_addstr(sb, "\033[34m");
-			return;
+			return 5;
 		case 's':	/* reset color */
 			strbuf_addstr(sb, "\033[m");
-			return;
+			return 6;
 		}
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}

 	/* these depend on the commit */
@@ -469,34 +469,34 @@ static void format_commit_item(struct strbuf *sb,
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -505,14 +505,14 @@ static void format_commit_item(struct strbuf *sb,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}

 	/* For the rest we have to parse the commit header. */
@@ -522,22 +522,23 @@ static void format_commit_item(struct strbuf *sb,
 	switch (placeholder[0]) {
 	case 's':
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
+		return 1;
 	case 'a':
 		format_person_part(sb, placeholder[1],
 		                   msg + c->author.off, c->author.len);
-		return;
+		return 2;
 	case 'c':
 		format_person_part(sb, placeholder[1],
 		                   msg + c->committer.off, c->committer.len);
-		return;
+		return 2;
 	case 'e':
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
+		return 1;
 	case 'b':
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0; /* unknown prefix */
 }

 void format_commit_message(const struct commit *commit,
diff --git a/strbuf.c b/strbuf.c
index b9b194b..3c2a3a7 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -141,7 +141,8 @@ void strbuf_expand(struct strbuf *sb, const char
                    const char **placeholders, expand_fn_t fn, void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		int prefix_len;

 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -149,14 +150,11 @@ void strbuf_expand(struct strbuf *sb, const char
 			break;
 		format = percent + 1;

-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		prefix_len = fn(sb, format, context);
+
+		if (prefix_len)
+			format += prefix_len;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..e6d09fc 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,7 +103,7 @@ static inline void strbuf_addbuf(struct strbuf *sb,
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);

-typedef void (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
+typedef int (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
 extern void strbuf_expand(struct strbuf *sb, const char *format,
const char **placeholders, expand_fn_t fn, void *context);

 __attribute__((format(printf,2,3)))
-- 
1.5.4.rc2.1.gec59-dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
  2007-12-30 13:46 Marco Costalba
@ 2008-01-02 18:11 ` René Scharfe
       [not found]   ` <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>
  0 siblings, 1 reply; 6+ messages in thread
From: René Scharfe @ 2008-01-02 18:11 UTC (permalink / raw)
  To: Marco Costalba; +Cc: Git Mailing List, Junio C Hamano, Johannes Schindelin

Marco Costalba schrieb:
> Currently the --prett=format prefix is looked up in a
> tight loop in strbuf_expand(), if found is passed as parameter
> to format_commit_item() that does another search using a
> switch statement to select the proper operation according to
> the kind of prefix.
> 
> Because the switch statement is already able to discard unknown
> matches we don't need the prefix lookup before to call format_commit_item()
> 
> This patch removes an useless loop in a very fasth path,
> used by, as example, by 'git log' with --pretty=format option
> 
> Signed-off-by: Marco Costalba <mcostalba@gmail.com>
> ---
> 
> This patch is somewhat experimental and is not intended to be merged as is.
> 
> That's what is missing:
> 
> - Matching of multi char prefixes is not 100% reliable, as example to match
>   prefix "Cgreen" only the first 'C' and the third char 'e' is
> checked, this could
>   lead to aliases in case of malformed prefixes, as example something like
>   "Cxxexxxx" will match the same.

Well, you need to undo this optimization if you remove the loop that
makes sure that only valid placeholders are passed to the callback
function -- the result would be that you only move the prefixcmp() from
strbuf_expand() into the callbacks.

A better way to speed up strbuf_expand() may be to require the list of
placeholders to be sorted, their count to be passed on and then to
replace the sequential lookup with a binary search.  --pretty=format
currently recognizes 29 placeholders, which might be a high enough
number for a more complicated search method to pay off.

> marco@localhost linux-2.6]$ time git log --topo-order --no-color
> --parents -z --log-size --boundary
> --pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null

In your special case it would be even faster to simply reorder the list
with decreasing number of occurrence.  Of course it's hard to guess how
often a particular placeholder is used in the wild, but moving %n from
next to last to first place should be a safe bet.

René

^ permalink raw reply	[flat|nested] 6+ messages in thread

[parent not found: <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>]

* Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
       [not found]   ` <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>
@ 2008-01-03  0:45     ` René Scharfe
  2008-01-03  9:08       ` Junio C Hamano
  0 siblings, 1 reply; 6+ messages in thread
From: René Scharfe @ 2008-01-03  0:45 UTC (permalink / raw)
  To: Marco Costalba; +Cc: Git Mailing List, Junio C Hamano, Johannes Schindelin

Marco Costalba schrieb:
> If we go on complex stuff then we can bite the bullet and use a text
> matcher state machine, but again, I still think that the best way is
> to avoid the 99% unusless loop and use only a (slightly) beefed up
> switch case instead.
> 
> I would think you agree with me that searching if a string is found in
> a string vector then pass the _same_ string to another function that
> (practically) redoes *the same* check using a switch statement is not
> the best optimization one can think about.

The loop makes implementing the callback function a bit easier, since
you don't need to cover all cases; the input is already checked by
strbuf_expand().

Anyway, here's your patch again with a few small changes: the
placeholders array is gone as you suggested, the cases for %Cx, %ax and
%cx are check for unknown placeholders and the callback function returns
the number of bytes it consumed as size_t.

All in all: less code, slightly more complex callback functions (needs
to return the length of the consumed placeholder or 0 if the input
doesn't match a placeholder) and increased speed.  I have to admit that
I start to like it. :-)

I tried to cover all cases of valid and invalid input; did I miss any?

René


 pretty.c |  143 ++++++++++++++++++++++++++------------------------------------
 strbuf.c |   19 ++++-----
 strbuf.h |    4 +-
 3 files changed, 70 insertions(+), 96 deletions(-)

diff --git a/pretty.c b/pretty.c
index 5b1078b..c0e9c8a 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,8 +282,8 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void format_person_part(struct strbuf *sb, char part,
-                               const char *msg, int len)
+static size_t format_person_part(struct strbuf *sb, char part,
+                                 const char *msg, int len)
 {
 	int start, end, tz = 0;
 	unsigned long date;
@@ -297,36 +297,36 @@ static void format_person_part(struct strbuf *sb, char part,
 		end--;
 	if (part == 'n') {	/* name */
 		strbuf_add(sb, msg, end);
-		return;
+		return 1;
 	}
 
 	if (start >= len)
-		return;
+		return 1;
 
 	/* parse email */
 	for (end = start + 1; end < len && msg[end] != '>'; end++)
 		; /* do nothing */
 
 	if (end >= len)
-		return;
+		return 1;
 
 	if (part == 'e') {	/* email */
 		strbuf_add(sb, msg + start, end - start);
-		return;
+		return 1;
 	}
 
 	/* parse date */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
 		; /* do nothing */
 	if (start >= len)
-		return;
+		return 1;
 	date = strtoul(msg + start, &ep, 10);
 	if (msg + start == ep)
-		return;
+		return 1;
 
 	if (part == 't') {	/* date, UNIX timestamp */
 		strbuf_add(sb, msg + start, ep - (msg + start));
-		return;
+		return 1;
 	}
 
 	/* parse tz */
@@ -341,17 +341,19 @@ static void format_person_part(struct strbuf *sb, char part,
 	switch (part) {
 	case 'd':	/* date */
 		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
-		return;
+		return 1;
 	case 'D':	/* date, RFC2822 style */
 		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
-		return;
+		return 1;
 	case 'r':	/* date, relative */
 		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
-		return;
+		return 1;
 	case 'i':	/* date, ISO 8601 */
 		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
-		return;
+		return 1;
 	}
+
+	return 0;	/* unknown person part */
 }
 
 struct chunk {
@@ -432,8 +434,8 @@ static void parse_commit_header(struct format_commit_context *context)
 	context->commit_header_parsed = 1;
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+                                 void *context)
 {
 	struct format_commit_context *c = context;
 	const struct commit *commit = c->commit;
@@ -443,23 +445,23 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
 	case 'C':
-		switch (placeholder[3]) {
-		case 'd':	/* red */
+		if (!prefixcmp(placeholder + 1, "red")) {
 			strbuf_addstr(sb, "\033[31m");
-			return;
-		case 'e':	/* green */
+			return 4;
+		} else if (!prefixcmp(placeholder + 1, "green")) {
 			strbuf_addstr(sb, "\033[32m");
-			return;
-		case 'u':	/* blue */
+			return 6;
+		} else if (!prefixcmp(placeholder + 1, "blue")) {
 			strbuf_addstr(sb, "\033[34m");
-			return;
-		case 's':	/* reset color */
+			return 5;
+		} else if (!prefixcmp(placeholder + 1, "reset")) {
 			strbuf_addstr(sb, "\033[m");
-			return;
-		}
+			return 6;
+		} else
+			return 0;
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}
 
 	/* these depend on the commit */
@@ -469,34 +471,34 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -505,14 +507,14 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}
 
 	/* For the rest we have to parse the commit header. */
@@ -520,66 +522,41 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		parse_commit_header(c);
 
 	switch (placeholder[0]) {
-	case 's':
+	case 's':		/* subject */
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
-	case 'a':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->author.off, c->author.len);
-		return;
-	case 'c':
-		format_person_part(sb, placeholder[1],
-		                   msg + c->committer.off, c->committer.len);
-		return;
-	case 'e':
+		return 1;
+	case 'a':		/* author ... */
+		if (format_person_part(sb, placeholder[1],
+		                       msg + c->author.off,
+		                       c->author.len))
+			return 2;
+		else
+			return 0;
+	case 'c':		/* committer ... */
+		if (format_person_part(sb, placeholder[1],
+		                       msg + c->committer.off,
+		                       c->committer.len))
+			return 2;
+		else
+			return 0;
+	case 'e':		/* encoding */
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
-	case 'b':
+		return 1;
+	case 'b':		/* body */
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0;		/* unknown placeholder */
 }
 
 void format_commit_message(const struct commit *commit,
                            const void *format, struct strbuf *sb)
 {
-	const char *placeholders[] = {
-		"H",		/* commit hash */
-		"h",		/* abbreviated commit hash */
-		"T",		/* tree hash */
-		"t",		/* abbreviated tree hash */
-		"P",		/* parent hashes */
-		"p",		/* abbreviated parent hashes */
-		"an",		/* author name */
-		"ae",		/* author email */
-		"ad",		/* author date */
-		"aD",		/* author date, RFC2822 style */
-		"ar",		/* author date, relative */
-		"at",		/* author date, UNIX timestamp */
-		"ai",		/* author date, ISO 8601 */
-		"cn",		/* committer name */
-		"ce",		/* committer email */
-		"cd",		/* committer date */
-		"cD",		/* committer date, RFC2822 style */
-		"cr",		/* committer date, relative */
-		"ct",		/* committer date, UNIX timestamp */
-		"ci",		/* committer date, ISO 8601 */
-		"e",		/* encoding */
-		"s",		/* subject */
-		"b",		/* body */
-		"Cred",		/* red */
-		"Cgreen",	/* green */
-		"Cblue",	/* blue */
-		"Creset",	/* reset color */
-		"n",		/* newline */
-		"m",		/* left/right/bottom */
-		NULL
-	};
 	struct format_commit_context context;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
-	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+	strbuf_expand(sb, format, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index b9b194b..7bb087c 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -137,11 +137,12 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_expand(struct strbuf *sb, const char *format,
-                   const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+                   void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		size_t consumed;
 
 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -149,14 +150,10 @@ void strbuf_expand(struct strbuf *sb, const char *format,
 			break;
 		format = percent + 1;
 
-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
 
-typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
 
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
  2008-01-03  0:45     ` René Scharfe
@ 2008-01-03  9:08       ` Junio C Hamano
  2008-01-03 10:06         ` Marco Costalba
  0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2008-01-03  9:08 UTC (permalink / raw)
  To: René Scharfe; +Cc: Marco Costalba, Git Mailing List, Johannes Schindelin

René Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> The loop makes implementing the callback function a bit easier, since
> you don't need to cover all cases; the input is already checked by
> strbuf_expand().
>
> Anyway, here's your patch again with a few small changes: the
> placeholders array is gone as you suggested, the cases for %Cx, %ax and
> %cx are check for unknown placeholders and the callback function returns
> the number of bytes it consumed as size_t.
>
> All in all: less code, slightly more complex callback functions (needs
> to return the length of the consumed placeholder or 0 if the input
> doesn't match a placeholder) and increased speed.  I have to admit that
> I start to like it. :-)

I'll let Marco bench it and hopefully Ack with an updated
(final) commit log message.

I think Dscho and Marco's earlier prefixcmp() optimization to
avoid strlen() can stay, but with "inline" removed.  That should
be equivalent to the version before the optimization, both from
the point of view of the code footprint and callchain length,
but still avoid strlen() cost.

Due to lack of better place the patch below moves it to strbuf.c
which probably is the closest collection of "stringy" stuff.

$ size git ;# with the attached patch
   text    data     bss     dec     hex filename
 731144   13456  263464 1008064   f61c0 git
$ size ../git.build/git ;# before Dscho's patch
   text    data     bss     dec     hex filename
 731272   13456  263464 1008192   f6240 ../git.build/git
$ size ~/bin/git ;# with Dscho's patch
   text    data     bss     dec     hex filename
 740736   13456  263464 1017656   f8738 /home/junio/bin/git

You earlier said 2,620,938 vs 2,640,450 and I think you meant
what "ls -l" reports.  I suspect it is not a very good measure,
but the numbers here are:

$ ls -l git ;# with the attached patch
-rwxrwxr-x 83 junio src 3345237 2008-01-03 00:53 git
$ ls -l ../git.build/git ;# before Dscho's patch
-rwxrwxr-x 83 junio src 3364803 2008-01-03 01:01 ../git.build/git
$ ls -l ~/bin/git ;# with Dscho's patch
-rwxr-xr-x 83 junio src 3389299 2008-01-02 15:18 /home/junio/bin/git

-- >8 --
Uninline prefixcmp()

Now the routine is an open-coded loop that avoids an extra
strlen() in the previous implementation, it got a bit too big to
be inlined.  Uninlining it makes code footprint smaller but the
result still retains the avoidance of strlen() cost.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-compat-util.h |   11 ++---------
 strbuf.c          |    9 +++++++++
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index 7059cbd..b6ef544 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -122,6 +122,8 @@ extern void set_die_routine(void (*routine)(const char *err, va_list params) NOR
 extern void set_error_routine(void (*routine)(const char *err, va_list params));
 extern void set_warn_routine(void (*routine)(const char *warn, va_list params));
 
+extern int prefixcmp(const char *str, const char *prefix);
+
 #ifdef NO_MMAP
 
 #ifndef PROT_READ
@@ -396,15 +398,6 @@ static inline int sane_case(int x, int high)
 	return x;
 }
 
-static inline int prefixcmp(const char *str, const char *prefix)
-{
-	for (; ; str++, prefix++)
-		if (!*prefix)
-			return 0;
-		else if (*str != *prefix)
-			return (unsigned char)*prefix - (unsigned char)*str;
-}
-
 static inline int strtoul_ui(char const *s, int base, unsigned int *result)
 {
 	unsigned long ul;
diff --git a/strbuf.c b/strbuf.c
index b9b194b..5efcfc8 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -1,5 +1,14 @@
 #include "cache.h"
 
+int prefixcmp(const char *str, const char *prefix)
+{
+	for (; ; str++, prefix++)
+		if (!*prefix)
+			return 0;
+		else if (*str != *prefix)
+			return (unsigned char)*prefix - (unsigned char)*str;
+}
+
 /*
  * Used as the default ->buf value, so that people can always assume
  * buf is non NULL and ->buf is NUL terminated even for a freshly

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
  2008-01-03  9:08       ` Junio C Hamano
@ 2008-01-03 10:06         ` Marco Costalba
  0 siblings, 0 replies; 6+ messages in thread
From: Marco Costalba @ 2008-01-03 10:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: René Scharfe, Git Mailing List, Johannes Schindelin

On Jan 3, 2008 10:08 AM, Junio C Hamano <gitster@pobox.com> wrote:
>
>
> I'll let Marco bench it and hopefully Ack with an updated
> (final) commit log message.
>

I will bench today and post the results.

Marco

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-01-06  0:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-06  0:10 [PATCH] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
  -- strict thread matches above, loose matches on Subject: below --
2007-12-30 13:46 Marco Costalba
2008-01-02 18:11 ` René Scharfe
     [not found]   ` <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>
2008-01-03  0:45     ` René Scharfe
2008-01-03  9:08       ` Junio C Hamano
2008-01-03 10:06         ` Marco Costalba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).