git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand()
@ 2008-02-02 11:09 Marco Costalba
  2008-02-03 21:53 ` Junio C Hamano
  2008-02-05  7:00 ` Junio C Hamano
  0 siblings, 2 replies; 5+ messages in thread
From: Marco Costalba @ 2008-02-02 11:09 UTC (permalink / raw)
  To: gitster; +Cc: git, Marco Costalba

Currently the --prett=format prefix is looked up in a
tight loop in strbuf_expand(), if prefix is found is then
used as argument for format_commit_item() that does another
search by a switch statement to select the proper operation.

Because the switch statement is already able to discard
unknown matches we don't need the prefix lookup before
to call format_commit_item()

This patch removes an useless loop in a very fast path,
used by, as example, by 'git log' with --pretty=format option

Signed-off-by: Marco Costalba <mcostalba@gmail.com>
---

This is the resend of an optimization patch rebased on
current next.

I resend now that branch is open again.

 pretty.c |  202 +++++++++++++++++++++++++++----------------------------------
 strbuf.c |   19 +++----
 strbuf.h |    4 +-
 3 files changed, 99 insertions(+), 126 deletions(-)

diff --git a/pretty.c b/pretty.c
index b987ff2..64ead65 100644
--- a/pretty.c
+++ b/pretty.c
@@ -282,16 +282,18 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void format_person_part(struct strbuf *sb, char part,
+/* returns placeholder length or 0 if placeholder is not known */
+static size_t format_person_part(struct strbuf *sb, char part,
                                const char *msg, int len)
 {
-	int start, end, tz = 0;
-	unsigned long date;
+	int start, end, tz = 0, end_of_data;
+	unsigned long date = 0;
 	char *ep;
 
-	/* parse name */
+	/* advance 'end' to point to email start delimiter */
 	for (end = 0; end < len && msg[end] != '<'; end++)
 		; /* do nothing */
+
 	/*
 	 * If it does not even have a '<' and '>', that is
 	 * quite a bogus commit author and we discard it;
@@ -301,65 +303,72 @@ static void format_person_part(struct strbuf *sb, char part,
 	 * which means start (beginning of email address) must
 	 * be strictly below len.
 	 */
-	start = end + 1;
-	if (start >= len - 1)
-		return;
-	while (end > 0 && isspace(msg[end - 1]))
-		end--;
+	end_of_data = (end >= len - 1);
+
 	if (part == 'n') {	/* name */
-		strbuf_add(sb, msg, end);
-		return;
+		if (!end_of_data) {
+			while (end > 0 && isspace(msg[end - 1]))
+				end--;
+			strbuf_add(sb, msg, end);
+		}
+		return 2;
 	}
+	start = ++end; /* save email start position */
 
-	/* parse email */
-	for (end = start; end < len && msg[end] != '>'; end++)
+	/* advance 'end' to point to email end delimiter */
+	for ( ; end < len && msg[end] != '>'; end++)
 		; /* do nothing */
 
-	if (end >= len)
-		return;
+	end_of_data = (end >= len);
 
 	if (part == 'e') {	/* email */
-		strbuf_add(sb, msg + start, end - start);
-		return;
+		if (!end_of_data)
+			strbuf_add(sb, msg + start, end - start);
+		return 2;
 	}
-
-	/* parse date */
+	/* advance 'start' to point to date start delimiter */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
 		; /* do nothing */
-	if (start >= len)
-		return;
-	date = strtoul(msg + start, &ep, 10);
-	if (msg + start == ep)
-		return;
+
+	end_of_data = (start >= len);
+
+	if (!end_of_data)
+		date = strtoul(msg + start, &ep, 10);
 
 	if (part == 't') {	/* date, UNIX timestamp */
-		strbuf_add(sb, msg + start, ep - (msg + start));
-		return;
+		if (!end_of_data)
+			strbuf_add(sb, msg + start, ep - (msg + start));
+		return 2;
 	}
+	if (!end_of_data) {	/* parse tz */
+		for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
+			; /* do nothing */
 
-	/* parse tz */
-	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
-		; /* do nothing */
-	if (start + 1 < len) {
-		tz = strtoul(msg + start + 1, NULL, 10);
-		if (msg[start] == '-')
-			tz = -tz;
+		if (start + 1 < len) {
+			tz = strtoul(msg + start + 1, NULL, 10);
+			if (msg[start] == '-')
+				tz = -tz;
+		}
 	}
-
 	switch (part) {
-	case 'd':	/* date */
-		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
-		return;
-	case 'D':	/* date, RFC2822 style */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
-		return;
-	case 'r':	/* date, relative */
-		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
-		return;
-	case 'i':	/* date, ISO 8601 */
-		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
-		return;
-	}
+	case 'd': /* date */
+		if (!end_of_data)
+			strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		return 2;
+	case 'D': /* date, RFC2822 style */
+		if (!end_of_data)
+			strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		return 2;
+	case 'r': /* date, relative */
+		if (!end_of_data)
+			strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		return 2;
+	case 'i': /* date, ISO 8601 */
+		if (!end_of_data)
+			strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		return 2;
+	}
+	return 0; /* unknown placeholder */
 }
 
 struct chunk {
@@ -440,7 +449,7 @@ static void parse_commit_header(struct format_commit_context *context)
 	context->commit_header_parsed = 1;
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
                                void *context)
 {
 	struct format_commit_context *c = context;
@@ -451,23 +460,23 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
 	case 'C':
-		switch (placeholder[3]) {
-		case 'd':	/* red */
+		if (!prefixcmp(placeholder + 1, "red")) {
 			strbuf_addstr(sb, "\033[31m");
-			return;
-		case 'e':	/* green */
+			return 4;
+		} else if (!prefixcmp(placeholder + 1, "green")) {
 			strbuf_addstr(sb, "\033[32m");
-			return;
-		case 'u':	/* blue */
+			return 6;
+		} else if (!prefixcmp(placeholder + 1, "blue")) {
 			strbuf_addstr(sb, "\033[34m");
-			return;
-		case 's':	/* reset color */
+			return 5;
+		} else if (!prefixcmp(placeholder + 1, "reset")) {
 			strbuf_addstr(sb, "\033[m");
-			return;
-		}
+			return 6;
+		} else
+			return 0;
 	case 'n':		/* newline */
 		strbuf_addch(sb, '\n');
-		return;
+		return 1;
 	}
 
 	/* these depend on the commit */
@@ -477,34 +486,34 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	switch (placeholder[0]) {
 	case 'H':		/* commit hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
-		return;
+		return 1;
 	case 'h':		/* abbreviated commit hash */
 		if (add_again(sb, &c->abbrev_commit_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
-		return;
+		return 1;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
-		return;
+		return 1;
 	case 't':		/* abbreviated tree hash */
 		if (add_again(sb, &c->abbrev_tree_hash))
-			return;
+			return 1;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
 		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
-		return;
+		return 1;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
 		}
-		return;
+		return 1;
 	case 'p':		/* abbreviated parent hashes */
 		if (add_again(sb, &c->abbrev_parent_hashes))
-			return;
+			return 1;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
@@ -513,14 +522,14 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		}
 		c->abbrev_parent_hashes.len = sb->len -
 		                              c->abbrev_parent_hashes.off;
-		return;
+		return 1;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
 		                 ? '-'
 		                 : (commit->object.flags & SYMMETRIC_LEFT)
 		                 ? '<'
 		                 : '>');
-		return;
+		return 1;
 	}
 
 	/* For the rest we have to parse the commit header. */
@@ -528,66 +537,33 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		parse_commit_header(c);
 
 	switch (placeholder[0]) {
-	case 's':
+	case 's':	/* subject */
 		strbuf_add(sb, msg + c->subject.off, c->subject.len);
-		return;
-	case 'a':
-		format_person_part(sb, placeholder[1],
+		return 1;
+	case 'a':	/* author ... */
+		return format_person_part(sb, placeholder[1],
 		                   msg + c->author.off, c->author.len);
-		return;
-	case 'c':
-		format_person_part(sb, placeholder[1],
+	case 'c':	/* committer ... */
+		return format_person_part(sb, placeholder[1],
 		                   msg + c->committer.off, c->committer.len);
-		return;
-	case 'e':
+	case 'e':	/* encoding */
 		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
-		return;
-	case 'b':
+		return 1;
+	case 'b':	/* body */
 		strbuf_addstr(sb, msg + c->body_off);
-		return;
+		return 1;
 	}
+	return 0;	/* unknown placeholder */
 }
 
 void format_commit_message(const struct commit *commit,
                            const void *format, struct strbuf *sb)
 {
-	const char *placeholders[] = {
-		"H",		/* commit hash */
-		"h",		/* abbreviated commit hash */
-		"T",		/* tree hash */
-		"t",		/* abbreviated tree hash */
-		"P",		/* parent hashes */
-		"p",		/* abbreviated parent hashes */
-		"an",		/* author name */
-		"ae",		/* author email */
-		"ad",		/* author date */
-		"aD",		/* author date, RFC2822 style */
-		"ar",		/* author date, relative */
-		"at",		/* author date, UNIX timestamp */
-		"ai",		/* author date, ISO 8601 */
-		"cn",		/* committer name */
-		"ce",		/* committer email */
-		"cd",		/* committer date */
-		"cD",		/* committer date, RFC2822 style */
-		"cr",		/* committer date, relative */
-		"ct",		/* committer date, UNIX timestamp */
-		"ci",		/* committer date, ISO 8601 */
-		"e",		/* encoding */
-		"s",		/* subject */
-		"b",		/* body */
-		"Cred",		/* red */
-		"Cgreen",	/* green */
-		"Cblue",	/* blue */
-		"Creset",	/* reset color */
-		"n",		/* newline */
-		"m",		/* left/right/bottom */
-		NULL
-	};
 	struct format_commit_context context;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
-	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
+	strbuf_expand(sb, format, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index 5efcfc8..32ab8e5 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -146,11 +146,12 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_expand(struct strbuf *sb, const char *format,
-                   const char **placeholders, expand_fn_t fn, void *context)
+void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn,
+                   void *context)
 {
 	for (;;) {
-		const char *percent, **p;
+		const char *percent;
+		size_t consumed;
 
 		percent = strchrnul(format, '%');
 		strbuf_add(sb, format, percent - format);
@@ -158,14 +159,10 @@ void strbuf_expand(struct strbuf *sb, const char *format,
 			break;
 		format = percent + 1;
 
-		for (p = placeholders; *p; p++) {
-			if (!prefixcmp(format, *p))
-				break;
-		}
-		if (*p) {
-			fn(sb, *p, context);
-			format += strlen(*p);
-		} else
+		consumed = fn(sb, format, context);
+		if (consumed)
+			format += consumed;
+		else
 			strbuf_addch(sb, '%');
 	}
 }
diff --git a/strbuf.h b/strbuf.h
index 36d61db..faec229 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -103,8 +103,8 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 }
 extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
 
-typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
-extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context);
 
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-- 
1.5.4.rc4.39.g524a

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-02 11:09 [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
@ 2008-02-03 21:53 ` Junio C Hamano
  2008-02-03 22:22   ` Marco Costalba
  2008-02-05  7:00 ` Junio C Hamano
  1 sibling, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2008-02-03 21:53 UTC (permalink / raw)
  To: Marco Costalba; +Cc: gitster, git

Marco Costalba <mcostalba@gmail.com> writes:

> diff --git a/pretty.c b/pretty.c
> index b987ff2..64ead65 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -282,16 +282,18 @@ static char *logmsg_reencode(const struct commit *commit,
>  	return out;
>  }
>  
> -static void format_person_part(struct strbuf *sb, char part,
> +/* returns placeholder length or 0 if placeholder is not known */

That "return placeholder length" is a bit confusing, and I suspect
the reason may be because the interface is misdesigned.

This function gets only a single character "part" and adds the
matching information to sb if found, otherwise it doesn't, so
the only possible return values are 0 or 2.

Wouldn't it be much cleaner if this returned a bool that says "I
found and substituted that 'part' you asked me to handle"?

> +static size_t format_person_part(struct strbuf *sb, char part,
>                                 const char *msg, int len)
>  {
> -	int start, end, tz = 0;
> -	unsigned long date;
> +	int start, end, tz = 0, end_of_data;
> +	unsigned long date = 0;
>  	char *ep;
>  
> -	/* parse name */
> +	/* advance 'end' to point to email start delimiter */
>  	for (end = 0; end < len && msg[end] != '<'; end++)
>  		; /* do nothing */
> +
>  	/*
>  	 * If it does not even have a '<' and '>', that is
>  	 * quite a bogus commit author and we discard it;
> @@ -301,65 +303,72 @@ static void format_person_part(struct strbuf *sb, char part,
>  	 * which means start (beginning of email address) must
>  	 * be strictly below len.
>  	 */
> -	start = end + 1;
> -	if (start >= len - 1)
> -		return;
> -	while (end > 0 && isspace(msg[end - 1]))
> -		end--;

The comment you can see in the context seems to refer to the
logic implemented by the part you are rewriting.  Don't you need
to update it?  Also the ealier part of the same comment talks
about safety against a malformed input and explains the "return;"
you are removing here.  It is not clear where that logic has
gone...

> +	end_of_data = (end >= len - 1);
> +

The variable name "end_of_data" is unclear.  What does this
boolean mean?  The line is without address and timestamp?
The item you are parsing is not properly terminated?

>  	if (part == 'n') {	/* name */
> -		strbuf_add(sb, msg, end);
> -		return;
> +		if (!end_of_data) {
> +			while (end > 0 && isspace(msg[end - 1]))
> +				end--;
> +			strbuf_add(sb, msg, end);
> +		}
> +		return 2;
>  	}
> +	start = ++end; /* save email start position */

What happens if end_of_data was already true in this case, I
have to wonder...  Language lawyers may point out that the
result of ++end would be undefined, which I do not personally
care about in this case, but this feels dirty if not wrong.

> @@ -451,23 +460,23 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
>  	/* these are independent of the commit */
>  	switch (placeholder[0]) {
>  	case 'C':
> -		switch (placeholder[3]) {
> -		case 'd':	/* red */
> +		if (!prefixcmp(placeholder + 1, "red")) {
>  			strbuf_addstr(sb, "\033[31m");
> -			return;
> -		case 'e':	/* green */
> +			return 4;
> +		} else if (!prefixcmp(placeholder + 1, "green")) {
>  			strbuf_addstr(sb, "\033[32m");
> -			return;
> -		case 'u':	/* blue */
> +			return 6;
> +		} else if (!prefixcmp(placeholder + 1, "blue")) {
>  			strbuf_addstr(sb, "\033[34m");
> -			return;
> -		case 's':	/* reset color */
> +			return 5;
> +		} else if (!prefixcmp(placeholder + 1, "reset")) {
>  			strbuf_addstr(sb, "\033[m");
> -			return;
> -		}
> +			return 6;
> +		} else
> +			return 0;

While these look much cleaner than using the magic "check the
third letter that happens to be unique" hack, the return values
can easily go out-of-sync.  I'd suggest to have a static array
of color names you support and iterate over it.

> @@ -528,66 +537,33 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
>  ...
>  void format_commit_message(const struct commit *commit,
>                             const void *format, struct strbuf *sb)
>  {
> -	const char *placeholders[] = {
> -		"H",		/* commit hash */
> ...
> -		"n",		/* newline */
> -		"m",		/* left/right/bottom */
> -		NULL
> -	};
>  	struct format_commit_context context;
>  
>  	memset(&context, 0, sizeof(context));
>  	context.commit = commit;
> -	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
> +	strbuf_expand(sb, format, format_commit_item, &context);
>  }

This is much nicer.  We reduced duplicated data from our code.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-03 21:53 ` Junio C Hamano
@ 2008-02-03 22:22   ` Marco Costalba
  0 siblings, 0 replies; 5+ messages in thread
From: Marco Costalba @ 2008-02-03 22:22 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Feb 3, 2008 10:53 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Marco Costalba <mcostalba@gmail.com> writes:
>
> >
> > -static void format_person_part(struct strbuf *sb, char part,
> > +/* returns placeholder length or 0 if placeholder is not known */
>
> That "return placeholder length" is a bit confusing, and I suspect
> the reason may be because the interface is misdesigned.
>
> This function gets only a single character "part" and adds the
> matching information to sb if found, otherwise it doesn't, so
> the only possible return values are 0 or 2.
>
> Wouldn't it be much cleaner if this returned a bool that says "I
> found and substituted that 'part' you asked me to handle"?
>

It happens that _currently_ placeholders that start with 'a' or 'c'
have length of 2 chars, so return value can be only 0 or 2, but this
is by accident, the return value is really the length of parsed
placeholder. Indeed if you see the return value of the caller
format_commit_item() is also the length of the parsed placeholder and
so should be our format_person_part() whom return value is forwarded
by caller:

from format_commit_item()

	case 'a':	/* author ... */
		return format_person_part(sb, placeholder[1],
		                   msg + c->author.off, c->author.len);



> >       /*
> >        * If it does not even have a '<' and '>', that is
> >        * quite a bogus commit author and we discard it;
> > @@ -301,65 +303,72 @@ static void format_person_part(struct strbuf *sb, char part,
> >        * which means start (beginning of email address) must
> >        * be strictly below len.
> >        */
> > -     start = end + 1;
> > -     if (start >= len - 1)
> > -             return;
> > -     while (end > 0 && isspace(msg[end - 1]))
> > -             end--;
>
> The comment you can see in the context seems to refer to the
> logic implemented by the part you are rewriting.  Don't you need
> to update it?

Why? if I had written

end_of_data = (end > len - 1);

instead of

end_of_data = (end >= len - 1);

I agree with you comment would have been obsoleted but is not the case.

>
> > +     end_of_data = (end >= len - 1);
> > +
>
> The variable name "end_of_data" is unclear.  What does this
> boolean mean?  The line is without address and timestamp?
> The item you are parsing is not properly terminated?
>

It means "we have nothing more to parse here" and we _could_ return
now but we keep on because we need to know if the part placeholder is
valid or is unkown, so we really need to go through following switch
statement before to return with a correct return value.


> >       if (part == 'n') {      /* name */
> > -             strbuf_add(sb, msg, end);
> > -             return;
> > +             if (!end_of_data) {
> > +                     while (end > 0 && isspace(msg[end - 1]))
> > +                             end--;
> > +                     strbuf_add(sb, msg, end);
> > +             }
> > +             return 2;
> >       }
> > +     start = ++end; /* save email start position */
>
> What happens if end_of_data was already true in this case, I
> have to wonder...  Language lawyers may point out that the
> result of ++end would be undefined, which I do not personally
> care about in this case, but this feels dirty if not wrong.
>

Could be dirty, but is not wrong because when end_of_data flag as is
set as is the case, any use of 'end' variable is skipped, only the
following swicth statement is evaluated with the only purpose to
compute a valid return value.


Thanks for your review
Marco

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-02 11:09 [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
  2008-02-03 21:53 ` Junio C Hamano
@ 2008-02-05  7:00 ` Junio C Hamano
  2008-02-05 13:17   ` Marco Costalba
  1 sibling, 1 reply; 5+ messages in thread
From: Junio C Hamano @ 2008-02-05  7:00 UTC (permalink / raw)
  To: Marco Costalba; +Cc: git

Marco Costalba <mcostalba@gmail.com> writes:

> Currently the --prett=format prefix is looked up in a
> ...
> diff --git a/pretty.c b/pretty.c
> index b987ff2..64ead65 100644
> --- a/pretty.c
> +++ b/pretty.c
> @@ -282,16 +282,18 @@ static char *logmsg_reencode(const struct commit *commit,
>  	return out;
>  }
>  
> -static void format_person_part(struct strbuf *sb, char part,
> +/* returns placeholder length or 0 if placeholder is not known */
> +static size_t format_person_part(struct strbuf *sb, char part,
>                                 const char *msg, int len)
>  {
> -	int start, end, tz = 0;
> -	unsigned long date;
> +	int start, end, tz = 0, end_of_data;
> +	unsigned long date = 0;
>  	char *ep;
>  
> -	/* parse name */
> +	/* advance 'end' to point to email start delimiter */
>  	for (end = 0; end < len && msg[end] != '<'; end++)
>  		; /* do nothing */
> +

Another thing I noticed is that --pretty='format:%an %ae %at'
would end up running this function three times.  Perhaps it is
worth memoizing the result in format_commit_context while you
are at it?

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand()
  2008-02-05  7:00 ` Junio C Hamano
@ 2008-02-05 13:17   ` Marco Costalba
  0 siblings, 0 replies; 5+ messages in thread
From: Marco Costalba @ 2008-02-05 13:17 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Feb 5, 2008 8:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
>
> Another thing I noticed is that --pretty='format:%an %ae %at'
> would end up running this function three times.  Perhaps it is
> worth memoizing the result in format_commit_context while you
> are at it?
>

Yes, I tought about this. But at the end I rejected this idea to keep
the code simple and because the optimization was in any case small
(and also in a special cases only).

I did some profiling with and without my patch and I noticed a nice
improvement so that with the patch applied git-rev-list and git-log
--pretty=format have almost the same execution times.

This is because the biggest time in both cases is spent in zlib
decompress, so also a local further speed up of  format_person_part()
does not gain anything but complicates the code.

Marco

P.S: I will try to adress your concerns on the naming of end_of_data
flag (hints are welcomed, I'm bad at naming) and on the skipping
further elaborations if end_of_data is true to have cleaner code, I
was thinking about using 'goto xxxx' or something like that.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-02-05 13:18 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-02 11:09 [PATCH RESEND] Avoid a useless prefix lookup in strbuf_expand() Marco Costalba
2008-02-03 21:53 ` Junio C Hamano
2008-02-03 22:22   ` Marco Costalba
2008-02-05  7:00 ` Junio C Hamano
2008-02-05 13:17   ` Marco Costalba

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).