* [PATCH] Avoid a useless prefix lookup in strbuf_expand()
@ 2008-01-06 0:10 Marco Costalba
0 siblings, 0 replies; 6+ messages in thread
From: Marco Costalba @ 2008-01-06 0:10 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List, René Scharfe
[-- Attachment #1: Type: text/plain, Size: 12231 bytes --]
Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.
Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()
This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---
To apply on top of "[PATCH] Fix an off by one bug in pretty.c"
I send also as attached file because I thing my mailer will
word wrap this one.
pretty.c | 228 +++++++++++++++++++++++++++++--------------------------------
strbuf.c | 19 ++---
strbuf.h | 4 +-
3 files changed, 118 insertions(+), 133 deletions(-)
diff --git a/pretty.c b/pretty.c
index 3ce5e6f..5132b1f 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,76 +282,93 @@ static char *logmsg_reencode(const struct
return out;
}
-static void format_person_part(struct strbuf *sb, char part,
- const char *msg, int len)
+static int parse_tz(char *ep, const char *msg, int len) {
+
+ int tz = 0, start = ep - msg + 1;
+
+ for ( ;start < len && isspace(msg[start]); start++)
+ ; /* do nothing */
+
+ if (start + 1 < len) {
+ tz = strtoul(msg + start + 1, NULL, 10);
+ if (msg[start] == '-')
+ tz = -tz;
+ }
+ return tz;
+}
+
+static size_t format_person_part(struct strbuf *sb, char part,
+ const char *msg, int len)
{
- int start, end, tz = 0;
- unsigned long date;
+ int start, end, tz = 0, date_valid;
+ unsigned long date = 0;
char *ep;
- /* parse name */
+ /* advance 'end' to point to name end delimiter */
for (end = 0; end < len && msg[end] != '<'; end++)
; /* do nothing */
- start = end + 1;
- while (end > 0 && isspace(msg[end - 1]))
- end--;
- if (part == 'n') { /* name */
+
+ if (part == 'n') { /* name */
+ while (end > 0 && isspace(msg[end - 1]))
+ end--;
+
strbuf_add(sb, msg, end);
- return;
+ return 2;
}
+ start = ++end; /* save email start delimiter */
- if (start >= len)
- return;
-
- /* parse email */
- for (end = start; end < len && msg[end] != '>'; end++)
+ /* advance 'end' to point to email end delimiter */
+ for ( ; end < len && msg[end] != '>'; end++)
; /* do nothing */
- if (end >= len)
- return;
-
- if (part == 'e') { /* email */
- strbuf_add(sb, msg + start, end - start);
- return;
+ if (part == 'e') { /* email */
+ if (end - start > 0)
+ strbuf_add(sb, msg + start, end - start);
+ return 2;
}
- /* parse date */
+ /* advance 'start' to point to date start delimiter */
for (start = end + 1; start < len && isspace(msg[start]); start++)
; /* do nothing */
- if (start >= len)
- return;
- date = strtoul(msg + start, &ep, 10);
- if (msg + start == ep)
- return;
- if (part == 't') { /* date, UNIX timestamp */
- strbuf_add(sb, msg + start, ep - (msg + start));
- return;
- }
+ date_valid = start < len;
- /* parse tz */
- for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
- ; /* do nothing */
- if (start + 1 < len) {
- tz = strtoul(msg + start + 1, NULL, 10);
- if (msg[start] == '-')
- tz = -tz;
+ if (date_valid)
+ date = strtoul(msg + start, &ep, 10);
+
+ if (part == 't') { /* date, UNIX timestamp */
+ if (date_valid && msg + start != ep)
+ strbuf_add(sb, msg + start, ep - (msg + start));
+ return 2;
}
switch (part) {
- case 'd': /* date */
- strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
- return;
- case 'D': /* date, RFC2822 style */
- strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
- return;
- case 'r': /* date, relative */
- strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
- return;
- case 'i': /* date, ISO 8601 */
- strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
- return;
+ case 'd': /* date */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+ }
+ return 2;
+ case 'D': /* date, RFC2822 style */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+ }
+ return 2;
+ case 'r': /* date, relative */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+ }
+ return 2;
+ case 'i': /* date, ISO 8601 */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+ }
+ return 2;
}
+ return 0; /* unknown person part */
}
struct chunk {
@@ -432,8 +449,8 @@ static void parse_commit_header(struct format_
context->commit_header_parsed = 1;
}
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
- void *context)
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+ void *context)
{
struct format_commit_context *c = context;
const struct commit *commit = c->commit;
@@ -443,23 +460,23 @@ static void format_commit_item(struct strbuf *sb,
/* these are independent of the commit */
switch (placeholder[0]) {
case 'C':
- switch (placeholder[3]) {
- case 'd': /* red */
+ if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, "\033[31m");
- return;
- case 'e': /* green */
+ return 4;
+ } else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, "\033[32m");
- return;
- case 'u': /* blue */
+ return 6;
+ } else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, "\033[34m");
- return;
- case 's': /* reset color */
+ return 5;
+ } else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, "\033[m");
- return;
- }
+ return 6;
+ } else
+ return 0;
case 'n': /* newline */
strbuf_addch(sb, '\n');
- return;
+ return 1;
}
/* these depend on the commit */
@@ -469,34 +486,34 @@ static void format_commit_item(struct strbuf *sb,
switch (placeholder[0]) {
case 'H': /* commit hash */
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
- return;
+ return 1;
case 'h': /* abbreviated commit hash */
if (add_again(sb, &c->abbrev_commit_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
DEFAULT_ABBREV));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
- return;
+ return 1;
case 'T': /* tree hash */
strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
- return;
+ return 1;
case 't': /* abbreviated tree hash */
if (add_again(sb, &c->abbrev_tree_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
DEFAULT_ABBREV));
c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
- return;
+ return 1;
case 'P': /* parent hashes */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
}
- return;
+ return 1;
case 'p': /* abbreviated parent hashes */
if (add_again(sb, &c->abbrev_parent_hashes))
- return;
+ return 1;
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
@@ -505,14 +522,14 @@ static void format_commit_item(struct strbuf
*sb, const char *placeholder,
}
c->abbrev_parent_hashes.len = sb->len -
c->abbrev_parent_hashes.off;
- return;
+ return 1;
case 'm': /* left/right/bottom */
strbuf_addch(sb, (commit->object.flags & BOUNDARY)
? '-'
: (commit->object.flags & SYMMETRIC_LEFT)
? '<'
: '>');
- return;
+ return 1;
}
/* For the rest we have to parse the commit header. */
@@ -520,66 +537,37 @@ static void format_commit_item(struct strbuf *sb,
parse_commit_header(c);
switch (placeholder[0]) {
- case 's':
+ case 's': /* subject */
strbuf_add(sb, msg + c->subject.off, c->subject.len);
- return;
- case 'a':
- format_person_part(sb, placeholder[1],
- msg + c->author.off, c->author.len);
- return;
- case 'c':
- format_person_part(sb, placeholder[1],
- msg + c->committer.off, c->committer.len);
- return;
- case 'e':
+ return 1;
+ case 'a': /* author ... */
+ return format_person_part(sb, placeholder[1],
+ msg + c->author.off,
+ c->author.len);
+
+ case 'c': /* committer ... */
+ return format_person_part(sb, placeholder[1],
+ msg + c->committer.off,
+ c->committer.len);
+
+ case 'e': /* encoding */
strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
- return;
- case 'b':
+ return 1;
+ case 'b': /* body */
strbuf_addstr(sb, msg + c->body_off);
- return;
+ return 1;
}
+ return 0; /* unknown placeholder */
}
void format_commit_message(const struct commit *commit,
const void *format, struct strbuf *sb)
{
- const char *placeholders[] = {
- "H", /* commit hash */
- "h", /* abbreviated commit hash */
- "T", /* tree hash */
- "t", /* abbreviated tree hash */
- "P", /* parent hashes */
- "p", /* abbreviated parent hashes */
- "an", /* author name */
- "ae", /* author email */
- "ad", /* author date */
- "aD", /* author date, RFC2822 style */
- "ar", /* author date, relative */
- "at", /* author date, UNIX timestamp */
- "ai", /* author date, ISO 8601 */
- "cn", /* committer name */
- "ce", /* committer email */
- "cd", /* committer date */
- "cD", /* committer date, RFC2822 style */
- "cr", /* committer date, relative */
- "ct", /* committer date, UNIX timestamp */
- "ci", /* committer date, ISO 8601 */
- "e", /* encoding */
- "s", /* subject */
- "b", /* body */
- "Cred", /* red */
- "Cgreen", /* green */
- "Cblue", /* blue */
- "Creset", /* reset color */
- "n", /* newline */
- "m", /* left/right/bottom */
- NULL
- };
struct format_commit_context context;
memset(&context, 0, sizeof(context));
context.commit = commit;
- strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+ strbuf_expand(sb, format, format_commit_item, &context);
}
static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb,
strbuf_setlen(sb, sb->len + len);
}
-void strbuf_expand(struct strbuf *sb, const char *format,
- const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+ void *context)
{
for (;;) {
- const char *percent, **p;
+ const char *percent;
+ size_t consumed;
percent = strchrnul(format, '%');
strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb,
break;
format = percent + 1;
- for (p = placeholders; *p; p++) {
- if (!prefixcmp(format, *p))
- break;
- }
- if (*p) {
- fn(sb, *p, context);
- format += strlen(*p);
- } else
+ consumed = fn(sb, format, context);
+ if (consumed)
+ format += consumed;
+ else
strbuf_addch(sb, '%');
}
}
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb,
}
extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
-typedef void (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format,
const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format,
expand_fn_t fn, void *context);
__attribute__((format(printf,2,3)))
extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
[-- Attachment #2: avoid_prefix_lookup.txt --]
[-- Type: text/plain, Size: 12243 bytes --]
Subject: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.
Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()
This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---
To apply on top of "[PATCH] Fix an off by one bug in pretty.c"
pretty.c | 228 +++++++++++++++++++++++++++++--------------------------------
strbuf.c | 19 ++---
strbuf.h | 4 +-
3 files changed, 118 insertions(+), 133 deletions(-)
diff --git a/pretty.c b/pretty.c
index 3ce5e6f..5132b1f 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,76 +282,93 @@ static char *logmsg_reencode(const struct
return out;
}
-static void format_person_part(struct strbuf *sb, char part,
- const char *msg, int len)
+static int parse_tz(char *ep, const char *msg, int len) {
+
+ int tz = 0, start = ep - msg + 1;
+
+ for ( ;start < len && isspace(msg[start]); start++)
+ ; /* do nothing */
+
+ if (start + 1 < len) {
+ tz = strtoul(msg + start + 1, NULL, 10);
+ if (msg[start] == '-')
+ tz = -tz;
+ }
+ return tz;
+}
+
+static size_t format_person_part(struct strbuf *sb, char part,
+ const char *msg, int len)
{
- int start, end, tz = 0;
- unsigned long date;
+ int start, end, tz = 0, date_valid;
+ unsigned long date = 0;
char *ep;
- /* parse name */
+ /* advance 'end' to point to name end delimiter */
for (end = 0; end < len && msg[end] != '<'; end++)
; /* do nothing */
- start = end + 1;
- while (end > 0 && isspace(msg[end - 1]))
- end--;
- if (part == 'n') { /* name */
+
+ if (part == 'n') { /* name */
+ while (end > 0 && isspace(msg[end - 1]))
+ end--;
+
strbuf_add(sb, msg, end);
- return;
+ return 2;
}
+ start = ++end; /* save email start delimiter */
- if (start >= len)
- return;
-
- /* parse email */
- for (end = start; end < len && msg[end] != '>'; end++)
+ /* advance 'end' to point to email end delimiter */
+ for ( ; end < len && msg[end] != '>'; end++)
; /* do nothing */
- if (end >= len)
- return;
-
- if (part == 'e') { /* email */
- strbuf_add(sb, msg + start, end - start);
- return;
+ if (part == 'e') { /* email */
+ if (end - start > 0)
+ strbuf_add(sb, msg + start, end - start);
+ return 2;
}
- /* parse date */
+ /* advance 'start' to point to date start delimiter */
for (start = end + 1; start < len && isspace(msg[start]); start++)
; /* do nothing */
- if (start >= len)
- return;
- date = strtoul(msg + start, &ep, 10);
- if (msg + start == ep)
- return;
- if (part == 't') { /* date, UNIX timestamp */
- strbuf_add(sb, msg + start, ep - (msg + start));
- return;
- }
+ date_valid = start < len;
- /* parse tz */
- for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
- ; /* do nothing */
- if (start + 1 < len) {
- tz = strtoul(msg + start + 1, NULL, 10);
- if (msg[start] == '-')
- tz = -tz;
+ if (date_valid)
+ date = strtoul(msg + start, &ep, 10);
+
+ if (part == 't') { /* date, UNIX timestamp */
+ if (date_valid && msg + start != ep)
+ strbuf_add(sb, msg + start, ep - (msg + start));
+ return 2;
}
switch (part) {
- case 'd': /* date */
- strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
- return;
- case 'D': /* date, RFC2822 style */
- strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
- return;
- case 'r': /* date, relative */
- strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
- return;
- case 'i': /* date, ISO 8601 */
- strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
- return;
+ case 'd': /* date */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+ }
+ return 2;
+ case 'D': /* date, RFC2822 style */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+ }
+ return 2;
+ case 'r': /* date, relative */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+ }
+ return 2;
+ case 'i': /* date, ISO 8601 */
+ if (date_valid) {
+ tz = parse_tz(ep, msg, len);
+ strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+ }
+ return 2;
}
+ return 0; /* unknown person part */
}
struct chunk {
@@ -432,8 +449,8 @@ static void parse_commit_header(struct format_
context->commit_header_parsed = 1;
}
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
- void *context)
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+ void *context)
{
struct format_commit_context *c = context;
const struct commit *commit = c->commit;
@@ -443,23 +460,23 @@ static void format_commit_item(struct strbuf *sb,
/* these are independent of the commit */
switch (placeholder[0]) {
case 'C':
- switch (placeholder[3]) {
- case 'd': /* red */
+ if (!prefixcmp(placeholder + 1, "red")) {
strbuf_addstr(sb, "\033[31m");
- return;
- case 'e': /* green */
+ return 4;
+ } else if (!prefixcmp(placeholder + 1, "green")) {
strbuf_addstr(sb, "\033[32m");
- return;
- case 'u': /* blue */
+ return 6;
+ } else if (!prefixcmp(placeholder + 1, "blue")) {
strbuf_addstr(sb, "\033[34m");
- return;
- case 's': /* reset color */
+ return 5;
+ } else if (!prefixcmp(placeholder + 1, "reset")) {
strbuf_addstr(sb, "\033[m");
- return;
- }
+ return 6;
+ } else
+ return 0;
case 'n': /* newline */
strbuf_addch(sb, '\n');
- return;
+ return 1;
}
/* these depend on the commit */
@@ -469,34 +486,34 @@ static void format_commit_item(struct strbuf *sb,
switch (placeholder[0]) {
case 'H': /* commit hash */
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
- return;
+ return 1;
case 'h': /* abbreviated commit hash */
if (add_again(sb, &c->abbrev_commit_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
DEFAULT_ABBREV));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
- return;
+ return 1;
case 'T': /* tree hash */
strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
- return;
+ return 1;
case 't': /* abbreviated tree hash */
if (add_again(sb, &c->abbrev_tree_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
DEFAULT_ABBREV));
c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
- return;
+ return 1;
case 'P': /* parent hashes */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
}
- return;
+ return 1;
case 'p': /* abbreviated parent hashes */
if (add_again(sb, &c->abbrev_parent_hashes))
- return;
+ return 1;
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
@@ -505,14 +522,14 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
}
c->abbrev_parent_hashes.len = sb->len -
c->abbrev_parent_hashes.off;
- return;
+ return 1;
case 'm': /* left/right/bottom */
strbuf_addch(sb, (commit->object.flags & BOUNDARY)
? '-'
: (commit->object.flags & SYMMETRIC_LEFT)
? '<'
: '>');
- return;
+ return 1;
}
/* For the rest we have to parse the commit header. */
@@ -520,66 +537,37 @@ static void format_commit_item(struct strbuf *sb,
parse_commit_header(c);
switch (placeholder[0]) {
- case 's':
+ case 's': /* subject */
strbuf_add(sb, msg + c->subject.off, c->subject.len);
- return;
- case 'a':
- format_person_part(sb, placeholder[1],
- msg + c->author.off, c->author.len);
- return;
- case 'c':
- format_person_part(sb, placeholder[1],
- msg + c->committer.off, c->committer.len);
- return;
- case 'e':
+ return 1;
+ case 'a': /* author ... */
+ return format_person_part(sb, placeholder[1],
+ msg + c->author.off,
+ c->author.len);
+
+ case 'c': /* committer ... */
+ return format_person_part(sb, placeholder[1],
+ msg + c->committer.off,
+ c->committer.len);
+
+ case 'e': /* encoding */
strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
- return;
- case 'b':
+ return 1;
+ case 'b': /* body */
strbuf_addstr(sb, msg + c->body_off);
- return;
+ return 1;
}
+ return 0; /* unknown placeholder */
}
void format_commit_message(const struct commit *commit,
const void *format, struct strbuf *sb)
{
- const char *placeholders[] = {
- "H", /* commit hash */
- "h", /* abbreviated commit hash */
- "T", /* tree hash */
- "t", /* abbreviated tree hash */
- "P", /* parent hashes */
- "p", /* abbreviated parent hashes */
- "an", /* author name */
- "ae", /* author email */
- "ad", /* author date */
- "aD", /* author date, RFC2822 style */
- "ar", /* author date, relative */
- "at", /* author date, UNIX timestamp */
- "ai", /* author date, ISO 8601 */
- "cn", /* committer name */
- "ce", /* committer email */
- "cd", /* committer date */
- "cD", /* committer date, RFC2822 style */
- "cr", /* committer date, relative */
- "ct", /* committer date, UNIX timestamp */
- "ci", /* committer date, ISO 8601 */
- "e", /* encoding */
- "s", /* subject */
- "b", /* body */
- "Cred", /* red */
- "Cgreen", /* green */
- "Cblue", /* blue */
- "Creset", /* reset color */
- "n", /* newline */
- "m", /* left/right/bottom */
- NULL
- };
struct format_commit_context context;
memset(&context, 0, sizeof(context));
context.commit = commit;
- strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+ strbuf_expand(sb, format, format_commit_item, &context);
}
static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb,
strbuf_setlen(sb, sb->len + len);
}
-void strbuf_expand(struct strbuf *sb, const char *format,
- const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+ void *context)
{
for (;;) {
- const char *percent, **p;
+ const char *percent;
+ size_t consumed;
percent = strchrnul(format, '%');
strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb,
break;
format = percent + 1;
- for (p = placeholders; *p; p++) {
- if (!prefixcmp(format, *p))
- break;
- }
- if (*p) {
- fn(sb, *p, context);
- format += strlen(*p);
- } else
+ consumed = fn(sb, format, context);
+ if (consumed)
+ format += consumed;
+ else
strbuf_addch(sb, '%');
}
}
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb,
}
extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
-typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
__attribute__((format(printf,2,3)))
extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH] Avoid a useless prefix lookup in strbuf_expand()
@ 2007-12-30 13:46 Marco Costalba
2008-01-02 18:11 ` René Scharfe
0 siblings, 1 reply; 6+ messages in thread
From: Marco Costalba @ 2007-12-30 13:46 UTC (permalink / raw)
To: Git Mailing List; +Cc: Junio C Hamano, Johannes Schindelin
Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if found is passed as parameter
to format_commit_item() that does another search using a
switch statement to select the proper operation according to
the kind of prefix.
Because the switch statement is already able to discard unknown
matches we don't need the prefix lookup before to call format_commit_item()
This patch removes an useless loop in a very fasth path,
used by, as example, by 'git log' with --pretty=format option
Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---
This patch is somewhat experimental and is not intended to be merged as is.
That's what is missing:
- Matching of multi char prefixes is not 100% reliable, as example to match
prefix "Cgreen" only the first 'C' and the third char 'e' is
checked, this could
lead to aliases in case of malformed prefixes, as example something like
"Cxxexxxx" will match the same.
- With this patch placeholders array defined in format_commit_message() becames
useless. That code should be refactored to remove the vector and
perhaps add some
stricter checking rules directly inside format_commit_item()
Anyhow with this patch we pass from
marco@localhost linux-2.6]$ time git log --topo-order --no-color
--parents -z --log-size --boundary
--pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null
2.89user 0.07system 0:02.96elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+27154minor)pagefaults 0swaps
With the super optimized prefixcmp() patch (see the other thread)
to the current
[marco@localhost linux-2.6]$ time git log --topo-order --no-color
--parents -z --log-size --boundary
--pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null
2.76user 0.08system 0:02.85elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+27153minor)pagefaults 0swaps
pretty.c | 43 ++++++++++++++++++++++---------------------
strbuf.c | 16 +++++++---------
strbuf.h | 2 +-
3 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/pretty.c b/pretty.c
index 5b1078b..6225042 100644
--- a/pretty.c
+++ b/pretty.c
@@ -432,7 +432,7 @@ static void parse_commit_header(struct
format_commit_context *context)
context->commit_header_parsed = 1;
}
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
+static int format_commit_item(struct strbuf *sb, const char *placeholder,
void *context)
{
struct format_commit_context *c = context;
@@ -446,20 +446,20 @@ static void format_commit_item(struct strbuf *sb,
switch (placeholder[3]) {
case 'd': /* red */
strbuf_addstr(sb, "\033[31m");
- return;
+ return 4;
case 'e': /* green */
strbuf_addstr(sb, "\033[32m");
- return;
+ return 6;
case 'u': /* blue */
strbuf_addstr(sb, "\033[34m");
- return;
+ return 5;
case 's': /* reset color */
strbuf_addstr(sb, "\033[m");
- return;
+ return 6;
}
case 'n': /* newline */
strbuf_addch(sb, '\n');
- return;
+ return 1;
}
/* these depend on the commit */
@@ -469,34 +469,34 @@ static void format_commit_item(struct strbuf *sb,
switch (placeholder[0]) {
case 'H': /* commit hash */
strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
- return;
+ return 1;
case 'h': /* abbreviated commit hash */
if (add_again(sb, &c->abbrev_commit_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
DEFAULT_ABBREV));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
- return;
+ return 1;
case 'T': /* tree hash */
strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
- return;
+ return 1;
case 't': /* abbreviated tree hash */
if (add_again(sb, &c->abbrev_tree_hash))
- return;
+ return 1;
strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
DEFAULT_ABBREV));
c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
- return;
+ return 1;
case 'P': /* parent hashes */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
}
- return;
+ return 1;
case 'p': /* abbreviated parent hashes */
if (add_again(sb, &c->abbrev_parent_hashes))
- return;
+ return 1;
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
@@ -505,14 +505,14 @@ static void format_commit_item(struct strbuf *sb,
}
c->abbrev_parent_hashes.len = sb->len -
c->abbrev_parent_hashes.off;
- return;
+ return 1;
case 'm': /* left/right/bottom */
strbuf_addch(sb, (commit->object.flags & BOUNDARY)
? '-'
: (commit->object.flags & SYMMETRIC_LEFT)
? '<'
: '>');
- return;
+ return 1;
}
/* For the rest we have to parse the commit header. */
@@ -522,22 +522,23 @@ static void format_commit_item(struct strbuf *sb,
switch (placeholder[0]) {
case 's':
strbuf_add(sb, msg + c->subject.off, c->subject.len);
- return;
+ return 1;
case 'a':
format_person_part(sb, placeholder[1],
msg + c->author.off, c->author.len);
- return;
+ return 2;
case 'c':
format_person_part(sb, placeholder[1],
msg + c->committer.off, c->committer.len);
- return;
+ return 2;
case 'e':
strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
- return;
+ return 1;
case 'b':
strbuf_addstr(sb, msg + c->body_off);
- return;
+ return 1;
}
+ return 0; /* unknown prefix */
}
void format_commit_message(const struct commit *commit,
diff --git a/strbuf.c b/strbuf.c
index b9b194b..3c2a3a7 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -141,7 +141,8 @@ void strbuf_expand(struct strbuf *sb, const char
const char **placeholders, expand_fn_t fn, void *context)
{
for (;;) {
- const char *percent, **p;
+ const char *percent;
+ int prefix_len;
percent = strchrnul(format, '%');
strbuf_add(sb, format, percent - format);
@@ -149,14 +150,11 @@ void strbuf_expand(struct strbuf *sb, const char
break;
format = percent + 1;
- for (p = placeholders; *p; p++) {
- if (!prefixcmp(format, *p))
- break;
- }
- if (*p) {
- fn(sb, *p, context);
- format += strlen(*p);
- } else
+ prefix_len = fn(sb, format, context);
+
+ if (prefix_len)
+ format += prefix_len;
+ else
strbuf_addch(sb, '%');
}
}
diff --git a/strbuf.h b/strbuf.h
index 36d61db..e6d09fc 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,7 +103,7 @@ static inline void strbuf_addbuf(struct strbuf *sb,
}
extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
-typedef void (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
+typedef int (*expand_fn_t) (struct strbuf *sb, const char
*placeholder, void *context);
extern void strbuf_expand(struct strbuf *sb, const char *format,
const char **placeholders, expand_fn_t fn, void *context);
__attribute__((format(printf,2,3)))
--
1.5.4.rc2.1.gec59-dirty
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand()
2007-12-30 13:46 Marco Costalba
@ 2008-01-02 18:11 ` René Scharfe
[not found] ` <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>
0 siblings, 1 reply; 6+ messages in thread
From: René Scharfe @ 2008-01-02 18:11 UTC (permalink / raw)
To: Marco Costalba; +Cc: Git Mailing List, Junio C Hamano, Johannes Schindelin
Marco Costalba schrieb:
> Currently the --prett=format prefix is looked up in a
> tight loop in strbuf_expand(), if found is passed as parameter
> to format_commit_item() that does another search using a
> switch statement to select the proper operation according to
> the kind of prefix.
>
> Because the switch statement is already able to discard unknown
> matches we don't need the prefix lookup before to call format_commit_item()
>
> This patch removes an useless loop in a very fasth path,
> used by, as example, by 'git log' with --pretty=format option
>
> Signed-off-by: Marco Costalba <mcostalba@gmail.com>
> ---
>
> This patch is somewhat experimental and is not intended to be merged as is.
>
> That's what is missing:
>
> - Matching of multi char prefixes is not 100% reliable, as example to match
> prefix "Cgreen" only the first 'C' and the third char 'e' is
> checked, this could
> lead to aliases in case of malformed prefixes, as example something like
> "Cxxexxxx" will match the same.
Well, you need to undo this optimization if you remove the loop that
makes sure that only valid placeholders are passed to the callback
function -- the result would be that you only move the prefixcmp() from
strbuf_expand() into the callbacks.
A better way to speed up strbuf_expand() may be to require the list of
placeholders to be sorted, their count to be passed on and then to
replace the sequential lookup with a binary search. --pretty=format
currently recognizes 29 placeholders, which might be a high enough
number for a more complicated search method to pay off.
> marco@localhost linux-2.6]$ time git log --topo-order --no-color
> --parents -z --log-size --boundary
> --pretty=format:"%m%HX%PX%n%an<%ae>%n%at%n%s%n%b" HEAD > /dev/null
In your special case it would be even faster to simply reorder the list
with decreasing number of occurrence. Of course it's hard to guess how
often a particular placeholder is used in the wild, but moving %n from
next to last to first place should be a safe bet.
René
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-01-06 0:10 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-06 0:10 [PATCH] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
-- strict thread matches above, loose matches on Subject: below --
2007-12-30 13:46 Marco Costalba
2008-01-02 18:11 ` René Scharfe
[not found] ` <e5bfff550801021027i6d6a399cob96ae3c840661884@mail.gmail.com>
2008-01-03 0:45 ` René Scharfe
2008-01-03 9:08 ` Junio C Hamano
2008-01-03 10:06 ` Marco Costalba
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).