From mboxrd@z Thu Jan 1 00:00:00 1970 From: =?UTF-8?B?UmVuw6kgU2NoYXJmZQ==?= Subject: Re: [PATCH] Avoid a useless prefix lookup in strbuf_expand() Date: Thu, 03 Jan 2008 01:45:17 +0100 Message-ID: <477C301D.7060509@lsrfire.ath.cx> References: <477BD3B4.2070708@lsrfire.ath.cx> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: Git Mailing List , Junio C Hamano , Johannes Schindelin To: Marco Costalba X-From: git-owner@vger.kernel.org Thu Jan 03 01:46:04 2008 Return-path: Envelope-to: gcvg-git-2@gmane.org Received: from vger.kernel.org ([209.132.176.167]) by lo.gmane.org with esmtp (Exim 4.50) id 1JAEDi-0006si-FT for gcvg-git-2@gmane.org; Thu, 03 Jan 2008 01:46:03 +0100 Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754523AbYACApi convert rfc822-to-quoted-printable (ORCPT ); Wed, 2 Jan 2008 19:45:38 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754471AbYACApi (ORCPT ); Wed, 2 Jan 2008 19:45:38 -0500 Received: from india601.server4you.de ([85.25.151.105]:58806 "EHLO india601.server4you.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753887AbYACApf (ORCPT ); Wed, 2 Jan 2008 19:45:35 -0500 Received: from [10.0.1.201] (p57B7FA27.dip.t-dialin.net [87.183.250.39]) by india601.server4you.de (Postfix) with ESMTP id D6EBD2F81DA; Thu, 3 Jan 2008 01:45:33 +0100 (CET) User-Agent: Thunderbird 2.0.0.9 (Windows/20071031) In-Reply-To: Sender: git-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: git@vger.kernel.org Archived-At: Marco Costalba schrieb: > If we go on complex stuff then we can bite the bullet and use a text > matcher state machine, but again, I still think that the best way is > to avoid the 99% unusless loop and use only a (slightly) beefed up > switch case instead. >=20 > I would think you agree with me that searching if a string is found i= n > a string vector then pass the _same_ string to another function that > (practically) redoes *the same* check using a switch statement is not > the best optimization one can think about. The loop makes implementing the callback function a bit easier, since you don't need to cover all cases; the input is already checked by strbuf_expand(). Anyway, here's your patch again with a few small changes: the placeholders array is gone as you suggested, the cases for %Cx, %ax and %cx are check for unknown placeholders and the callback function return= s the number of bytes it consumed as size_t. All in all: less code, slightly more complex callback functions (needs to return the length of the consumed placeholder or 0 if the input doesn't match a placeholder) and increased speed. I have to admit that I start to like it. :-) I tried to cover all cases of valid and invalid input; did I miss any? Ren=C3=A9 pretty.c | 143 ++++++++++++++++++++++++++----------------------------= -------- strbuf.c | 19 ++++----- strbuf.h | 4 +- 3 files changed, 70 insertions(+), 96 deletions(-) diff --git a/pretty.c b/pretty.c index 5b1078b..c0e9c8a 100644 --- a/pretty.c +++ b/pretty.c @@ -282,8 +282,8 @@ static char *logmsg_reencode(const struct commit *c= ommit, return out; } =20 -static void format_person_part(struct strbuf *sb, char part, - const char *msg, int len) +static size_t format_person_part(struct strbuf *sb, char part, + const char *msg, int len) { int start, end, tz =3D 0; unsigned long date; @@ -297,36 +297,36 @@ static void format_person_part(struct strbuf *sb,= char part, end--; if (part =3D=3D 'n') { /* name */ strbuf_add(sb, msg, end); - return; + return 1; } =20 if (start >=3D len) - return; + return 1; =20 /* parse email */ for (end =3D start + 1; end < len && msg[end] !=3D '>'; end++) ; /* do nothing */ =20 if (end >=3D len) - return; + return 1; =20 if (part =3D=3D 'e') { /* email */ strbuf_add(sb, msg + start, end - start); - return; + return 1; } =20 /* parse date */ for (start =3D end + 1; start < len && isspace(msg[start]); start++) ; /* do nothing */ if (start >=3D len) - return; + return 1; date =3D strtoul(msg + start, &ep, 10); if (msg + start =3D=3D ep) - return; + return 1; =20 if (part =3D=3D 't') { /* date, UNIX timestamp */ strbuf_add(sb, msg + start, ep - (msg + start)); - return; + return 1; } =20 /* parse tz */ @@ -341,17 +341,19 @@ static void format_person_part(struct strbuf *sb,= char part, switch (part) { case 'd': /* date */ strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL)); - return; + return 1; case 'D': /* date, RFC2822 style */ strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822)); - return; + return 1; case 'r': /* date, relative */ strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE)); - return; + return 1; case 'i': /* date, ISO 8601 */ strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601)); - return; + return 1; } + + return 0; /* unknown person part */ } =20 struct chunk { @@ -432,8 +434,8 @@ static void parse_commit_header(struct format_commi= t_context *context) context->commit_header_parsed =3D 1; } =20 -static void format_commit_item(struct strbuf *sb, const char *placehol= der, - void *context) +static size_t format_commit_item(struct strbuf *sb, const char *placeh= older, + void *context) { struct format_commit_context *c =3D context; const struct commit *commit =3D c->commit; @@ -443,23 +445,23 @@ static void format_commit_item(struct strbuf *sb,= const char *placeholder, /* these are independent of the commit */ switch (placeholder[0]) { case 'C': - switch (placeholder[3]) { - case 'd': /* red */ + if (!prefixcmp(placeholder + 1, "red")) { strbuf_addstr(sb, "\033[31m"); - return; - case 'e': /* green */ + return 4; + } else if (!prefixcmp(placeholder + 1, "green")) { strbuf_addstr(sb, "\033[32m"); - return; - case 'u': /* blue */ + return 6; + } else if (!prefixcmp(placeholder + 1, "blue")) { strbuf_addstr(sb, "\033[34m"); - return; - case 's': /* reset color */ + return 5; + } else if (!prefixcmp(placeholder + 1, "reset")) { strbuf_addstr(sb, "\033[m"); - return; - } + return 6; + } else + return 0; case 'n': /* newline */ strbuf_addch(sb, '\n'); - return; + return 1; } =20 /* these depend on the commit */ @@ -469,34 +471,34 @@ static void format_commit_item(struct strbuf *sb,= const char *placeholder, switch (placeholder[0]) { case 'H': /* commit hash */ strbuf_addstr(sb, sha1_to_hex(commit->object.sha1)); - return; + return 1; case 'h': /* abbreviated commit hash */ if (add_again(sb, &c->abbrev_commit_hash)) - return; + return 1; strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV)); c->abbrev_commit_hash.len =3D sb->len - c->abbrev_commit_hash.off; - return; + return 1; case 'T': /* tree hash */ strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1)); - return; + return 1; case 't': /* abbreviated tree hash */ if (add_again(sb, &c->abbrev_tree_hash)) - return; + return 1; strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1, DEFAULT_ABBREV)); c->abbrev_tree_hash.len =3D sb->len - c->abbrev_tree_hash.off; - return; + return 1; case 'P': /* parent hashes */ for (p =3D commit->parents; p; p =3D p->next) { if (p !=3D commit->parents) strbuf_addch(sb, ' '); strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1)); } - return; + return 1; case 'p': /* abbreviated parent hashes */ if (add_again(sb, &c->abbrev_parent_hashes)) - return; + return 1; for (p =3D commit->parents; p; p =3D p->next) { if (p !=3D commit->parents) strbuf_addch(sb, ' '); @@ -505,14 +507,14 @@ static void format_commit_item(struct strbuf *sb,= const char *placeholder, } c->abbrev_parent_hashes.len =3D sb->len - c->abbrev_parent_hashes.off; - return; + return 1; case 'm': /* left/right/bottom */ strbuf_addch(sb, (commit->object.flags & BOUNDARY) ? '-' : (commit->object.flags & SYMMETRIC_LEFT) ? '<' : '>'); - return; + return 1; } =20 /* For the rest we have to parse the commit header. */ @@ -520,66 +522,41 @@ static void format_commit_item(struct strbuf *sb,= const char *placeholder, parse_commit_header(c); =20 switch (placeholder[0]) { - case 's': + case 's': /* subject */ strbuf_add(sb, msg + c->subject.off, c->subject.len); - return; - case 'a': - format_person_part(sb, placeholder[1], - msg + c->author.off, c->author.len); - return; - case 'c': - format_person_part(sb, placeholder[1], - msg + c->committer.off, c->committer.len); - return; - case 'e': + return 1; + case 'a': /* author ... */ + if (format_person_part(sb, placeholder[1], + msg + c->author.off, + c->author.len)) + return 2; + else + return 0; + case 'c': /* committer ... */ + if (format_person_part(sb, placeholder[1], + msg + c->committer.off, + c->committer.len)) + return 2; + else + return 0; + case 'e': /* encoding */ strbuf_add(sb, msg + c->encoding.off, c->encoding.len); - return; - case 'b': + return 1; + case 'b': /* body */ strbuf_addstr(sb, msg + c->body_off); - return; + return 1; } + return 0; /* unknown placeholder */ } =20 void format_commit_message(const struct commit *commit, const void *format, struct strbuf *sb) { - const char *placeholders[] =3D { - "H", /* commit hash */ - "h", /* abbreviated commit hash */ - "T", /* tree hash */ - "t", /* abbreviated tree hash */ - "P", /* parent hashes */ - "p", /* abbreviated parent hashes */ - "an", /* author name */ - "ae", /* author email */ - "ad", /* author date */ - "aD", /* author date, RFC2822 style */ - "ar", /* author date, relative */ - "at", /* author date, UNIX timestamp */ - "ai", /* author date, ISO 8601 */ - "cn", /* committer name */ - "ce", /* committer email */ - "cd", /* committer date */ - "cD", /* committer date, RFC2822 style */ - "cr", /* committer date, relative */ - "ct", /* committer date, UNIX timestamp */ - "ci", /* committer date, ISO 8601 */ - "e", /* encoding */ - "s", /* subject */ - "b", /* body */ - "Cred", /* red */ - "Cgreen", /* green */ - "Cblue", /* blue */ - "Creset", /* reset color */ - "n", /* newline */ - "m", /* left/right/bottom */ - NULL - }; struct format_commit_context context; =20 memset(&context, 0, sizeof(context)); context.commit =3D commit; - strbuf_expand(sb, format, placeholders, format_commit_item, &context)= ; + strbuf_expand(sb, format, format_commit_item, &context); } =20 static void pp_header(enum cmit_fmt fmt, diff --git a/strbuf.c b/strbuf.c index b9b194b..7bb087c 100644 --- a/strbuf.c +++ b/strbuf.c @@ -137,11 +137,12 @@ void strbuf_addf(struct strbuf *sb, const char *f= mt, ...) strbuf_setlen(sb, sb->len + len); } =20 -void strbuf_expand(struct strbuf *sb, const char *format, - const char **placeholders, expand_fn_t fn, void *co= ntext) +void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t = fn, + void *context) { for (;;) { - const char *percent, **p; + const char *percent; + size_t consumed; =20 percent =3D strchrnul(format, '%'); strbuf_add(sb, format, percent - format); @@ -149,14 +150,10 @@ void strbuf_expand(struct strbuf *sb, const char = *format, break; format =3D percent + 1; =20 - for (p =3D placeholders; *p; p++) { - if (!prefixcmp(format, *p)) - break; - } - if (*p) { - fn(sb, *p, context); - format +=3D strlen(*p); - } else + consumed =3D fn(sb, format, context); + if (consumed) + format +=3D consumed; + else strbuf_addch(sb, '%'); } } diff --git a/strbuf.h b/strbuf.h index 36d61db..faec229 100644 --- a/strbuf.h +++ b/strbuf.h @@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb,= struct strbuf *sb2) { } extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); =20 -typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholde= r, void *context); -extern void strbuf_expand(struct strbuf *sb, const char *format, const= char **placeholders, expand_fn_t fn, void *context); +typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placehol= der, void *context); +extern void strbuf_expand(struct strbuf *sb, const char *format, expan= d_fn_t fn, void *context); =20 __attribute__((format(printf,2,3))) extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);