[PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
@ 2007-11-04 11:49 René Scharfe
  2007-11-04 14:08 ` [PATCH 7/5] pretty describe: add min_prio parameter to describe_commit() René Scharfe
  2007-11-04 14:11 ` [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders Johannes Schindelin
  0 siblings, 2 replies; 7+ messages in thread
From: René Scharfe @ 2007-11-04 11:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List

The new placeholders %ds (description string, git-describe style), %dn
(the name part) and %dd (the depth part) are added.

To avoid imposing the significant cost of running describe_commit() on
every format string, even if none of the new placeholders are used, a
new function, interp_count(), is introduced.  It is a stripped down
version of interpolate(), that simply counts the placeholders in a
format string.  If the describe placeholders are not found, setting up
the corresponding replacements is skipped.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---
 Documentation/pretty-formats.txt |    3 +++
 commit.c                         |   38 ++++++++++++++++++++++++++++++++++++++
 interpolate.c                    |   36 ++++++++++++++++++++++++++++++++++++
 interpolate.h                    |    2 ++
 t/t6120-describe.sh              |   22 ++++++++++++++++++++++
 5 files changed, 101 insertions(+), 0 deletions(-)

diff --git a/Documentation/pretty-formats.txt b/Documentation/pretty-formats.txt
index 0193c3c..ec86415 100644
--- a/Documentation/pretty-formats.txt
+++ b/Documentation/pretty-formats.txt
@@ -100,6 +100,9 @@ The placeholders are:
 - '%t': abbreviated tree hash
 - '%P': parent hashes
 - '%p': abbreviated parent hashes
+- '%ds': description
+- '%dn': tag name part of the description
+- '%dd': depth part of the description
 - '%an': author name
 - '%ae': author email
 - '%ad': author date
diff --git a/commit.c b/commit.c
index 2e52a2f..06d5cec 100644
--- a/commit.c
+++ b/commit.c
@@ -781,6 +781,9 @@ void format_commit_message(struct commit *commit,
 		{ "%t" },	/* abbreviated tree hash */
 		{ "%P" },	/* parent hashes */
 		{ "%p" },	/* abbreviated parent hashes */
+		{ "%ds" },	/* description */
+		{ "%dn" },	/* tag name part of the description */
+		{ "%dd" },	/* depth part of the description */
 		{ "%an" },	/* author name */
 		{ "%ae" },	/* author email */
 		{ "%ad" },	/* author date */
@@ -809,6 +812,7 @@ void format_commit_message(struct commit *commit,
 		IHASH = 0, IHASH_ABBREV,
 		ITREE, ITREE_ABBREV,
 		IPARENTS, IPARENTS_ABBREV,
+		IDESC, IDESC_NAME, IDESC_DEPTH,
 		IAUTHOR_NAME, IAUTHOR_EMAIL,
 		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
 		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
@@ -829,10 +833,13 @@ void format_commit_message(struct commit *commit,
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
+	unsigned long occurs[ARRAY_SIZE(table)];
 
 	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
 		die("invalid interp table!");
 
+	interp_count(occurs, format, table, ARRAY_SIZE(table));
+
 	/* these are independent of the commit */
 	interp_set_entry(table, IRED, "\033[31m");
 	interp_set_entry(table, IGREEN, "\033[32m");
@@ -875,6 +882,37 @@ void format_commit_message(struct commit *commit,
 				DEFAULT_ABBREV));
 	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
 
+	if (occurs[IDESC] || occurs[IDESC_DEPTH] || occurs[IDESC_NAME]) {
+		struct strbuf desc;
+		char *name;
+		int depth = 0;
+		char *depthstr;
+		const char *abbr;
+
+		load_commit_names(2);
+		name = describe_commit(commit, 10, 0, &depth);
+		clear_commit_name_flags(commit);
+		abbr = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV);
+
+		strbuf_init(&desc, 32);
+		strbuf_addstr(&desc, name ? name + 5 : abbr);
+		interp_set_entry(table, IDESC_NAME, desc.buf);
+		if (name) {
+			if (depth) {
+				strbuf_addch(&desc, '-');
+				depthstr = desc.buf + desc.len;
+				strbuf_addf(&desc, "%d", depth);
+				interp_set_entry(table, IDESC_DEPTH, depthstr);
+				strbuf_addstr(&desc, "-g");
+				strbuf_addstr(&desc, abbr);
+			} else {
+				interp_set_entry(table, IDESC_DEPTH, "0");
+			}
+		}
+		interp_set_entry(table, IDESC, desc.buf);
+		strbuf_release(&desc);
+	}
+
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
 		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
diff --git a/interpolate.c b/interpolate.c
index 6ef53f2..8f51649 100644
--- a/interpolate.c
+++ b/interpolate.c
@@ -102,3 +102,39 @@ unsigned long interpolate(char *result, unsigned long reslen,
 		*dest = '\0';
 	return newlen;
 }
+
+/*
+ * interp_count - count occurences of placeholders
+ */
+void interp_count(unsigned long *result, const char *orig,
+                  const struct interp *interps, int ninterps)
+{
+	const char *src = orig;
+	const char *name;
+	unsigned long namelen;
+	int i;
+	char c;
+
+	for (i = 0; i < ninterps; i++)
+		result[i] = 0;
+
+	while ((c = *src)) {
+		if (c == '%') {
+			/* Try to match an interpolation string. */
+			for (i = 0; i < ninterps; i++) {
+				name = interps[i].name;
+				namelen = strlen(name);
+				if (strncmp(src, name, namelen) == 0)
+					break;
+			}
+
+			/* Check for valid interpolation. */
+			if (i < ninterps) {
+				result[i]++;
+				src += namelen;
+				continue;
+			}
+		}
+		src++;
+	}
+}
diff --git a/interpolate.h b/interpolate.h
index 77407e6..c41ea18 100644
--- a/interpolate.h
+++ b/interpolate.h
@@ -22,5 +22,7 @@ extern void interp_clear_table(struct interp *table, int ninterps);
 extern unsigned long interpolate(char *result, unsigned long reslen,
 				 const char *orig,
 				 const struct interp *interps, int ninterps);
+extern void interp_count(unsigned long *result, const char *orig,
+                         const struct interp *interps, int ninterps);
 
 #endif /* INTERPOLATE_H */
diff --git a/t/t6120-describe.sh b/t/t6120-describe.sh
index ae8ee11..3be43c4 100755
--- a/t/t6120-describe.sh
+++ b/t/t6120-describe.sh
@@ -23,6 +23,28 @@ check_describe () {
 		false ;;
 	esac
 	'
+
+	# %ds, %dn and %dd don't work with --tags or --all
+	case "$@" in
+	*--tags*|*--all*) return ;;
+	esac
+
+	R=$(git log -n 1 --pretty=format:%ds "$@") &&
+	test_expect_success "log --pretty=format:%ds $*" '
+	case "$R" in
+	$expect)	echo happy ;;
+	*)	echo "Oops - $R is not $expect";
+		false ;;
+	esac
+	'
+	R=$(git log -n 1 --pretty=format:%dn-%dd-g%h "$@" | sed 's/-0-g.*//') &&
+	test_expect_success "log --pretty=format:%dn-%dd-g%h $*" '
+	case "$R" in
+	$expect)	echo happy ;;
+	*)	echo "Oops - $R is not $expect";
+		false ;;
+	esac
+	'
 }
 
 test_expect_success setup '
-- 
1.5.3.5.529.ge3d6d

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 7/5] pretty describe: add min_prio parameter to describe_commit()
  2007-11-04 11:49 [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders René Scharfe
@ 2007-11-04 14:08 ` René Scharfe
  2007-11-04 14:11 ` [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders Johannes Schindelin
  1 sibling, 0 replies; 7+ messages in thread
From: René Scharfe @ 2007-11-04 14:08 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List

If load_commit_names() is called with a certain priority and later with
a higher priority, all the old, low-priority names are still kept and
describe_commit() will happily take them into account.  It can thus
report heads, even if the user asked for annotated tags.

In the current code, this can't happen, because builtin-describe.c calls
load_commit_names() only once, and commit.c always asks for the highest
priority (annotated tags).  This patch fixes the problem anyway, for the
benefit of future users.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
---

 builtin-describe.c |    6 ++++--
 cache.h            |    2 +-
 commit.c           |    2 +-
 describe.c         |    7 ++++---
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/builtin-describe.c b/builtin-describe.c
index fcd93f4..481d92f 100644
--- a/builtin-describe.c
+++ b/builtin-describe.c
@@ -14,6 +14,7 @@ static int all;	/* Default to annotated tags only */
 static int tags;	/* But allow any tags if --tags is specified */
 static int abbrev = DEFAULT_ABBREV;
 static int max_candidates = 10;
+static int min_prio;
 
 static void describe(const char *arg, int last_one)
 {
@@ -28,7 +29,7 @@ static void describe(const char *arg, int last_one)
 	if (!cmit)
 		die("%s is not a valid '%s' object", arg, commit_type);
 
-	name = describe_commit(cmit, max_candidates, debug, &depth);
+	name = describe_commit(cmit, max_candidates, min_prio, debug, &depth);
 	if (!name)
 		die("cannot describe '%s'", sha1_to_hex(cmit->object.sha1));
 
@@ -74,7 +75,8 @@ int cmd_describe(int argc, const char **argv, const char *prefix)
 	 * If --tags, then any tags are used.
 	 * Otherwise only annotated tags are used.
 	 */
-	load_commit_names(all ? 0 : (tags ? 1 : 2));
+	min_prio = all ? 0 : (tags ? 1 : 2);
+	load_commit_names(min_prio);
 
 	if (argc == 0) {
 		describe("HEAD", 1);
diff --git a/cache.h b/cache.h
index 84423a3..703d6a9 100644
--- a/cache.h
+++ b/cache.h
@@ -605,6 +605,6 @@ void shift_tree(const unsigned char *, const unsigned char *, unsigned char *, i
 /* describe.c */
 struct commit;
 extern void load_commit_names(int min_prio);
-extern char *describe_commit(struct commit *cmit, int max_candidates, int debug, int *depthp);
+extern char *describe_commit(struct commit *cmit, int max_candidates, int min_prio, int debug, int *depthp);
 
 #endif /* CACHE_H */
diff --git a/commit.c b/commit.c
index 9ff4735..624005b 100644
--- a/commit.c
+++ b/commit.c
@@ -897,7 +897,7 @@ void format_commit_message(struct commit *commit,
 		const char *abbr;
 
 		load_commit_names(2);
-		name = describe_commit(commit, 10, 0, &depth);
+		name = describe_commit(commit, 10, 2, 0, &depth);
 		clear_commit_name_flags(commit);
 		abbr = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV);
 
diff --git a/describe.c b/describe.c
index 18c7abc..b6de4c1 100644
--- a/describe.c
+++ b/describe.c
@@ -102,7 +102,8 @@ static unsigned long finish_depth_computation(
 	return seen_commits;
 }
 
-char *describe_commit(struct commit *cmit, int max_candidates, int debug, int *depthp)
+char *describe_commit(struct commit *cmit, int max_candidates, int min_prio,
+                      int debug, int *depthp)
 {
 	struct commit *gave_up_on = NULL;
 	struct commit_list *list;
@@ -110,7 +111,7 @@ char *describe_commit(struct commit *cmit, int max_candidates, int debug, int *d
 	unsigned int match_cnt = 0, annotated_cnt = 0, cur_match;
 	unsigned long seen_commits = 0;
 
-	if (cmit->name) {
+	if (cmit->name && cmit->name_prio >= min_prio) {
 		*depthp = 0;
 		return cmit->name;
 	}
@@ -130,7 +131,7 @@ char *describe_commit(struct commit *cmit, int max_candidates, int debug, int *d
 		struct commit *c = pop_commit(&list);
 		struct commit_list *parents = c->parents;
 		seen_commits++;
-		if (c->name) {
+		if (c->name && c->name_prio >= min_prio) {
 			if (match_cnt < max_candidates) {
 				struct possible_tag *t = &all_matches[match_cnt++];
 				t->name = c->name;

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
  2007-11-04 11:49 [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders René Scharfe
  2007-11-04 14:08 ` [PATCH 7/5] pretty describe: add min_prio parameter to describe_commit() René Scharfe
@ 2007-11-04 14:11 ` Johannes Schindelin
  2007-11-04 14:42   ` René Scharfe
  1 sibling, 1 reply; 7+ messages in thread
From: Johannes Schindelin @ 2007-11-04 14:11 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Git Mailing List

Hi,

On Sun, 4 Nov 2007, Ren? Scharfe wrote:

> The new placeholders %ds (description string, git-describe style), %dn 
> (the name part) and %dd (the depth part) are added.
> 
> To avoid imposing the significant cost of running describe_commit() on 
> every format string, even if none of the new placeholders are used, a 
> new function, interp_count(), is introduced.  It is a stripped down 
> version of interpolate(), that simply counts the placeholders in a 
> format string.  If the describe placeholders are not found, setting up 
> the corresponding replacements is skipped.

The way I read this, those are two really quite independent patches 
squashed into one.

> +	unsigned long occurs[ARRAY_SIZE(table)];

You do not ever use the counts.  So, longs are overkill.  Even ints might 
be overkill, but probably the most convenient.  I would have gone with 
chars.  If I knew how to memset() an array of unsigned:1 entries to all 
zero, I would even have gone with that, but the runtime cost of this is 
probably higher than the chars.

But the even more fundamental problem is that you count the needed 
interpolations _every_ single time you output a commit message.

A much better place would be get_commit_format().  Yes that means 
restructuring the code a bit more, but I would say that this definitely 
would help.  My preference would even be introducing a new source file for 
the user format handling (commit-format.[ch]).

> +
> +/*
> + * interp_count - count occurences of placeholders
> + */
> +void interp_count(unsigned long *result, const char *orig,
> +                  const struct interp *interps, int ninterps)
> +{
> +	const char *src = orig;

You do not ever use orig again.  So why not just use that variable instead 
of introducing a new one?

> +	const char *name;
> +	unsigned long namelen;
> +	int i;
> +	char c;
> +
> +	for (i = 0; i < ninterps; i++)
> +		result[i] = 0;

memset()?

> +
> +	while ((c = *src)) {
> +		if (c == '%') {
> +			/* Try to match an interpolation string. */
> +			for (i = 0; i < ninterps; i++) {
> +				name = interps[i].name;
> +				namelen = strlen(name);
> +				if (strncmp(src, name, namelen) == 0)

prefixcmp()?

> +					break;
> +			}
> +
> +			/* Check for valid interpolation. */
> +			if (i < ninterps) {

This is ugly.  You already had a successful if() for that case earlier.

> +				result[i]++;
> +				src += namelen;
> +				continue;
> +			}
> +		}
> +		src++;
> +	}
> +}

I'd rewrite this whole loop as

	while ((c = *(orig++)))
		if (c == '%')
			/* Try to match an interpolation string. */
			for (i = 0; i < ninterps; i++)
				if (prefixcmp(orig, interps[i].name)) {
					result[i] = 1;
					orig += strlen(interps[i].name);
					break;
				}

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
  2007-11-04 14:11 ` [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders Johannes Schindelin
@ 2007-11-04 14:42   ` René Scharfe
  2007-11-04 15:25     ` Johannes Schindelin
  0 siblings, 1 reply; 7+ messages in thread
From: René Scharfe @ 2007-11-04 14:42 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, Git Mailing List

Johannes Schindelin schrieb:
> Hi,
> 
> On Sun, 4 Nov 2007, Ren? Scharfe wrote:
> 
>> The new placeholders %ds (description string, git-describe style), %dn 
>> (the name part) and %dd (the depth part) are added.
>>
>> To avoid imposing the significant cost of running describe_commit() on 
>> every format string, even if none of the new placeholders are used, a 
>> new function, interp_count(), is introduced.  It is a stripped down 
>> version of interpolate(), that simply counts the placeholders in a 
>> format string.  If the describe placeholders are not found, setting up 
>> the corresponding replacements is skipped.
> 
> The way I read this, those are two really quite independent patches 
> squashed into one.

Busted!  I didn't want to introduce a performance regression with the
%ds parsing code, but I also didn't want to add a function without
users.  Patch 6 was then glued on as an afterthought -- the rest of the
series was ready when I saw Paul's mail.

So, a better way might be to move patch 6 to the head of the series and
introduce interp_count() in it, too.

>> +	unsigned long occurs[ARRAY_SIZE(table)];
> 
> You do not ever use the counts.  So, longs are overkill.  Even ints might 
> be overkill, but probably the most convenient.  I would have gone with 
> chars.  If I knew how to memset() an array of unsigned:1 entries to all 
> zero, I would even have gone with that, but the runtime cost of this is 
> probably higher than the chars.

Well, it isn't used in format_commit_message() currently, but it could
be.  Multiply the count and and the length of each substitution (minus
the length of the placeholder) and you get the number of bytes you need
to allocate.  interpolate() wouldn't need to be called twice anymore.

> But the even more fundamental problem is that you count the needed 
> interpolations _every_ single time you output a commit message.
> 
> A much better place would be get_commit_format().  Yes that means 
> restructuring the code a bit more, but I would say that this definitely 
> would help.  My preference would even be introducing a new source file for 
> the user format handling (commit-format.[ch]).

Counting the interpolations is easier than actually interpolating.
Wherever the code goes, the calls to interpolate() and interp_count()
should stay together.

>> +
>> +/*
>> + * interp_count - count occurences of placeholders
>> + */
>> +void interp_count(unsigned long *result, const char *orig,
>> +                  const struct interp *interps, int ninterps)
>> +{
>> +	const char *src = orig;
> 
> You do not ever use orig again.  So why not just use that variable instead 
> of introducing a new one?

I simply copied interpolate() and then chopped off the parts not needed
for counting, to make it easy to see that this is the smaller brother.

> 
>> +	const char *name;
>> +	unsigned long namelen;
>> +	int i;
>> +	char c;
>> +
>> +	for (i = 0; i < ninterps; i++)
>> +		result[i] = 0;
> 
> memset()?

In earlier versions there was a memset() there.  I replaced it to make
the intent even more clear, but I guess not using memset() is simply
superstition..

> 
>> +
>> +	while ((c = *src)) {
>> +		if (c == '%') {
>> +			/* Try to match an interpolation string. */
>> +			for (i = 0; i < ninterps; i++) {
>> +				name = interps[i].name;
>> +				namelen = strlen(name);
>> +				if (strncmp(src, name, namelen) == 0)
> 
> prefixcmp()?

Copied from interpolate()..

>> +					break;
>> +			}
>> +
>> +			/* Check for valid interpolation. */
>> +			if (i < ninterps) {
> 
> This is ugly.  You already had a successful if() for that case earlier.

Dito..

> 
>> +				result[i]++;
>> +				src += namelen;
>> +				continue;
>> +			}
>> +		}
>> +		src++;
>> +	}
>> +}
> 
> I'd rewrite this whole loop as
> 
> 	while ((c = *(orig++)))
> 		if (c == '%')
> 			/* Try to match an interpolation string. */
> 			for (i = 0; i < ninterps; i++)
> 				if (prefixcmp(orig, interps[i].name)) {
> 					result[i] = 1;
> 					orig += strlen(interps[i].name);
> 					break;
> 				}

Cleanups are sure possible, but they should be done on top, and to both
interpolate() and interp_count().  Let's first see how far we get with
dumb code-copying and reusing the result in new ways. :)

Thanks,
René

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
  2007-11-04 14:42   ` René Scharfe
@ 2007-11-04 15:25     ` Johannes Schindelin
  2007-11-04 17:27       ` René Scharfe
  0 siblings, 1 reply; 7+ messages in thread
From: Johannes Schindelin @ 2007-11-04 15:25 UTC (permalink / raw)
  To: René Scharfe; +Cc: Junio C Hamano, Git Mailing List

Hi,

On Sun, 4 Nov 2007, Ren? Scharfe wrote:

> Johannes Schindelin schrieb:
> 
> > On Sun, 4 Nov 2007, Ren? Scharfe wrote:
> > 
> >> +	unsigned long occurs[ARRAY_SIZE(table)];
> > 
> > You do not ever use the counts.  So, longs are overkill.  Even ints 
> > might be overkill, but probably the most convenient.  I would have 
> > gone with chars.  If I knew how to memset() an array of unsigned:1 
> > entries to all zero, I would even have gone with that, but the runtime 
> > cost of this is probably higher than the chars.
> 
> Well, it isn't used in format_commit_message() currently, but it could 
> be.  Multiply the count and and the length of each substitution (minus 
> the length of the placeholder) and you get the number of bytes you need 
> to allocate.  interpolate() wouldn't need to be called twice anymore.

The better change, of course, would be to migrate interpolate() to strbuf.  
Then you do not have to play clever tricks anymore.

> > But the even more fundamental problem is that you count the needed 
> > interpolations _every_ single time you output a commit message.
> > 
> > A much better place would be get_commit_format().  Yes that means 
> > restructuring the code a bit more, but I would say that this definitely 
> > would help.  My preference would even be introducing a new source file for 
> > the user format handling (commit-format.[ch]).
> 
> Counting the interpolations is easier than actually interpolating. 
> Wherever the code goes, the calls to interpolate() and interp_count() 
> should stay together.

No.

The purpose of calling interp_count() is to know what placeholders have to 
be filled with substitutes.  As a consequence, the _logical_ thing to do 
is call interp_count() _once_.

It makes absolutely no sense to call the function over and over again, 
only to end up with the same result over and over again.

> >> +
> >> +/*
> >> + * interp_count - count occurences of placeholders
> >> + */
> >> +void interp_count(unsigned long *result, const char *orig,
> >> +                  const struct interp *interps, int ninterps)
> >> +{
> >> +	const char *src = orig;
> > 
> > You do not ever use orig again.  So why not just use that variable instead 
> > of introducing a new one?
> 
> I simply copied interpolate() and then chopped off the parts not needed
> for counting, to make it easy to see that this is the smaller brother.

It is not.  It does not do any substitution.  It is a pure helper for the 
process of filling the interpolation table.

> > I'd rewrite this whole loop as
> > 
> > 	while ((c = *(orig++)))
> > 		if (c == '%')
> > 			/* Try to match an interpolation string. */
> > 			for (i = 0; i < ninterps; i++)
> > 				if (prefixcmp(orig, interps[i].name)) {
> > 					result[i] = 1;
> > 					orig += strlen(interps[i].name);
> > 					break;
> > 				}
> 
> Cleanups are sure possible, but they should be done on top, and to both 
> interpolate() and interp_count().  Let's first see how far we get with 
> dumb code-copying and reusing the result in new ways. :)

Code copying is one of the primary sources for bad code.  Let's not even 
start.

IMHO there have to be _very_ good reasons to commit something that you 
plan to fix later anyway.

One such good reason would be that it is too hard to do in one go.  
Another good reason would be that you think the fix is not even needed 
(like I did when I wrote format: in the first place; I am quite surprised 
that after _that_ long a time people complain -- I'd have expected 
complaints right away or never).

In this case, I see no reason why we should go for suboptimal code first.

But hey, if you do not want to do it, I'll do it.  Just say so.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
  2007-11-04 15:25     ` Johannes Schindelin
@ 2007-11-04 17:27       ` René Scharfe
  2007-11-05  1:20         ` René Scharfe
  0 siblings, 1 reply; 7+ messages in thread
From: René Scharfe @ 2007-11-04 17:27 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, Git Mailing List

Johannes Schindelin schrieb:
> Hi,
> 
> On Sun, 4 Nov 2007, Ren? Scharfe wrote:
> 
>> Johannes Schindelin schrieb:
>>
>>> On Sun, 4 Nov 2007, Ren? Scharfe wrote:
>>>
>>>> +	unsigned long occurs[ARRAY_SIZE(table)];
>>> You do not ever use the counts.  So, longs are overkill.  Even ints 
>>> might be overkill, but probably the most convenient.  I would have 
>>> gone with chars.  If I knew how to memset() an array of unsigned:1 
>>> entries to all zero, I would even have gone with that, but the runtime 
>>> cost of this is probably higher than the chars.
>> Well, it isn't used in format_commit_message() currently, but it could 
>> be.  Multiply the count and and the length of each substitution (minus 
>> the length of the placeholder) and you get the number of bytes you need 
>> to allocate.  interpolate() wouldn't need to be called twice anymore.
> 
> The better change, of course, would be to migrate interpolate() to strbuf.  
> Then you do not have to play clever tricks anymore.
>
>>> But the even more fundamental problem is that you count the needed 
>>> interpolations _every_ single time you output a commit message.
>>>
>>> A much better place would be get_commit_format().  Yes that means 
>>> restructuring the code a bit more, but I would say that this definitely 
>>> would help.  My preference would even be introducing a new source file for 
>>> the user format handling (commit-format.[ch]).
>> Counting the interpolations is easier than actually interpolating. 
>> Wherever the code goes, the calls to interpolate() and interp_count() 
>> should stay together.
> 
> No.
> 
> The purpose of calling interp_count() is to know what placeholders have to 
> be filled with substitutes.  As a consequence, the _logical_ thing to do 
> is call interp_count() _once_.
> 
> It makes absolutely no sense to call the function over and over again, 
> only to end up with the same result over and over again.

To allow this optimization, you need to make the (not yet filled)
interpolation table available to the new callsite of interp_count().
And you need to somehow pass the result of interp_count() from every
caller of it to the setup code in format_commit_message().

To see if it's worthwhile, I've just replaced the array "occurs" and the
call to interp_count() with a static array, and measured the runtime.
The speed difference was lost in the noise.

>>>> +
>>>> +/*
>>>> + * interp_count - count occurences of placeholders
>>>> + */
>>>> +void interp_count(unsigned long *result, const char *orig,
>>>> +                  const struct interp *interps, int ninterps)
>>>> +{
>>>> +	const char *src = orig;
>>> You do not ever use orig again.  So why not just use that variable instead 
>>> of introducing a new one?
>> I simply copied interpolate() and then chopped off the parts not needed
>> for counting, to make it easy to see that this is the smaller brother.
> 
> It is not.  It does not do any substitution.  It is a pure helper for the 
> process of filling the interpolation table.

Sure.  It's important, though, that it reports the same number of
substitutions as interpolate() later actually performs.  Correctness
trumps cleanliness, and its easier to check that a copy is correct, even
if certain pieces are missing.

>>> I'd rewrite this whole loop as
>>>
>>> 	while ((c = *(orig++)))
>>> 		if (c == '%')
>>> 			/* Try to match an interpolation string. */
>>> 			for (i = 0; i < ninterps; i++)
>>> 				if (prefixcmp(orig, interps[i].name)) {
>>> 					result[i] = 1;
>>> 					orig += strlen(interps[i].name);
>>> 					break;
>>> 				}
>> Cleanups are sure possible, but they should be done on top, and to both 
>> interpolate() and interp_count().  Let's first see how far we get with 
>> dumb code-copying and reusing the result in new ways. :)
> 
> Code copying is one of the primary sources for bad code.  Let's not even 
> start.
> 
> IMHO there have to be _very_ good reasons to commit something that you 
> plan to fix later anyway.

Code copying can be bad if one copies bugs.  But code copying allows a
strange feat: new code can inherit maturity.  If you copy known good
code and then change it in trivial ways (keeping the structure etc.) to
make it do new things, then the chance of a bug creeping in is lower
than if you wrote that piece of code anew.

> One such good reason would be that it is too hard to do in one go.  
> Another good reason would be that you think the fix is not even needed 
> (like I did when I wrote format: in the first place; I am quite surprised 
> that after _that_ long a time people complain -- I'd have expected 
> complaints right away or never).

Not everybody is as fast as you, Dscho. ;-)

Another idea that I was kicking around, but didn't get time to
implement: a performance regression test suite, i.e. make test for
timings and memory usages.

> In this case, I see no reason why we should go for suboptimal code first.
> 
> But hey, if you do not want to do it, I'll do it.  Just say so.

Busted again!  I wanted to see if someone else would pick up the
janitorial work for me. :-)

In any case, interpolate.c needs some attention, with or without my
patch.  I agree that a native strbuf version would be nice.  How about
an interface like that:

	typedef const char *(*expand_fn_t)
		(const char *placeholder, void *context);
	void strbuf_addexpand(struct strbuf *sb, const char *format,
	                      const char **placeholders,
	                      expand_fn_t fn, void *context);

strbuf_addexpand() would call fn() when it recognizes a placeholder,
avoiding unneeded setup code.  It could cache the result, so that fn()
gets called at most a single time for each given placeholder.  context
would be passed through to fn(), e.g. a struct commit in case of
format_commit_message().  Makes sense?

Thanks,
René

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders
  2007-11-04 17:27       ` René Scharfe
@ 2007-11-05  1:20         ` René Scharfe
  0 siblings, 0 replies; 7+ messages in thread
From: René Scharfe @ 2007-11-05  1:20 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Junio C Hamano, Git Mailing List

René Scharfe schrieb:
> In any case, interpolate.c needs some attention, with or without my
> patch.  I agree that a native strbuf version would be nice.  How about
> an interface like that:
> 
> 	typedef const char *(*expand_fn_t)
> 		(const char *placeholder, void *context);
> 	void strbuf_addexpand(struct strbuf *sb, const char *format,
> 	                      const char **placeholders,
> 	                      expand_fn_t fn, void *context);
> 
> strbuf_addexpand() would call fn() when it recognizes a placeholder,
> avoiding unneeded setup code.  It could cache the result, so that fn()
> gets called at most a single time for each given placeholder.  context
> would be passed through to fn(), e.g. a struct commit in case of
> format_commit_message().  Makes sense?

OK, for discussion, I've experimented a bit, and here's what I could
come up with.  This version doesn't do any caching.  Is it even needed?

Anyway, strbuf_expand() is really simple, and the callback is expected
to add it's output directly to the strbuf.  That means that the number
of strdups and memdupz is reduced significantly, speeding up the whole
thing.  Converting code to use strbuf_expand is easy, but not trivial
(i.e. not doable by a script).

René


 commit.c |  275 ++++++++++++++++++++++++++++++++++---------------------------
 strbuf.c |   22 +++++
 strbuf.h |    3 +
 3 files changed, 178 insertions(+), 122 deletions(-)

diff --git a/commit.c b/commit.c
index 8262f6a..4a47b0e 100644
--- a/commit.c
+++ b/commit.c
@@ -698,7 +698,8 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void fill_person(struct interp *table, const char *msg, int len)
+static void format_person_part(struct strbuf *sb, char part,
+                               const char *msg, int len)
 {
 	int start, end, tz = 0;
 	unsigned long date;
@@ -710,7 +711,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	start = end + 1;
 	while (end > 0 && isspace(msg[end - 1]))
 		end--;
-	table[0].value = xmemdupz(msg, end);
+	if (part == 'n') {	/* name */
+		strbuf_add(sb, msg, end);
+		return;
+	}
 
 	if (start >= len)
 		return;
@@ -722,7 +726,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (end >= len)
 		return;
 
-	table[1].value = xmemdupz(msg + start, end - start);
+	if (part == 'e') {	/* email */
+		strbuf_add(sb, msg + start, end - start);
+		return;
+	}
 
 	/* parse date */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
@@ -733,7 +740,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (msg + start == ep)
 		return;
 
-	table[5].value = xmemdupz(msg + start, ep - (msg + start));
+	if (part == 't') {	/* date, UNIX timestamp */
+		strbuf_add(sb, msg + start, ep - (msg + start));
+		return;
+	}
 
 	/* parse tz */
 	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
@@ -744,123 +754,107 @@ static void fill_person(struct interp *table, const char *msg, int len)
 			tz = -tz;
 	}
 
-	interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
-	interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
-	interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
-	interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
+	switch (part) {
+	case 'd':	/* date */
+		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		return;
+	case 'D':	/* date, RFC2822 style */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		return;
+	case 'r':	/* date, relative */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		return;
+	case 'i':	/* date, ISO 8601 */
+		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		return;
+	}
 }
 
-void format_commit_message(const struct commit *commit,
-                           const void *format, struct strbuf *sb)
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
 {
-	struct interp table[] = {
-		{ "%H" },	/* commit hash */
-		{ "%h" },	/* abbreviated commit hash */
-		{ "%T" },	/* tree hash */
-		{ "%t" },	/* abbreviated tree hash */
-		{ "%P" },	/* parent hashes */
-		{ "%p" },	/* abbreviated parent hashes */
-		{ "%an" },	/* author name */
-		{ "%ae" },	/* author email */
-		{ "%ad" },	/* author date */
-		{ "%aD" },	/* author date, RFC2822 style */
-		{ "%ar" },	/* author date, relative */
-		{ "%at" },	/* author date, UNIX timestamp */
-		{ "%ai" },	/* author date, ISO 8601 */
-		{ "%cn" },	/* committer name */
-		{ "%ce" },	/* committer email */
-		{ "%cd" },	/* committer date */
-		{ "%cD" },	/* committer date, RFC2822 style */
-		{ "%cr" },	/* committer date, relative */
-		{ "%ct" },	/* committer date, UNIX timestamp */
-		{ "%ci" },	/* committer date, ISO 8601 */
-		{ "%e" },	/* encoding */
-		{ "%s" },	/* subject */
-		{ "%b" },	/* body */
-		{ "%Cred" },	/* red */
-		{ "%Cgreen" },	/* green */
-		{ "%Cblue" },	/* blue */
-		{ "%Creset" },	/* reset color */
-		{ "%n" },	/* newline */
-		{ "%m" },	/* left/right/bottom */
-	};
-	enum interp_index {
-		IHASH = 0, IHASH_ABBREV,
-		ITREE, ITREE_ABBREV,
-		IPARENTS, IPARENTS_ABBREV,
-		IAUTHOR_NAME, IAUTHOR_EMAIL,
-		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
-		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
-		ICOMMITTER_NAME, ICOMMITTER_EMAIL,
-		ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
-		ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
-		ICOMMITTER_ISO8601,
-		IENCODING,
-		ISUBJECT,
-		IBODY,
-		IRED, IGREEN, IBLUE, IRESET_COLOR,
-		INEWLINE,
-		ILEFT_RIGHT,
-	};
+	struct commit *commit = context;
 	struct commit_list *p;
-	char parents[1024];
-	unsigned long len;
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
 
-	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
-		die("invalid interp table!");
-
 	/* these are independent of the commit */
-	interp_set_entry(table, IRED, "\033[31m");
-	interp_set_entry(table, IGREEN, "\033[32m");
-	interp_set_entry(table, IBLUE, "\033[34m");
-	interp_set_entry(table, IRESET_COLOR, "\033[m");
-	interp_set_entry(table, INEWLINE, "\n");
+	switch (placeholder[0]) {
+	case 'C':
+		switch (placeholder[3]) {
+		case 'd':	/* red */
+			strbuf_addstr(sb, "\033[31m");
+			return;
+		case 'e':	/* green */
+			strbuf_addstr(sb, "\033[32m");
+			return;
+		case 'u':	/* blue */
+			strbuf_addstr(sb, "\033[34m");
+			return;
+		case 's':	/* reset color */
+			strbuf_addstr(sb, "\033[m");
+			return;
+		}
+	case 'n':		/* newline */
+		strbuf_addch(sb, '\n');
+		return;
+	}
 
 	/* these depend on the commit */
 	if (!commit->object.parsed)
 		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
-			find_unique_abbrev(commit->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
-			find_unique_abbrev(commit->tree->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ILEFT_RIGHT,
-			 (commit->object.flags & BOUNDARY)
-			 ? "-"
-			 : (commit->object.flags & SYMMETRIC_LEFT)
-			 ? "<"
-			 : ">");
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
 
+	switch (placeholder[0]) {
+	case 'H':		/* commit hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
+		return;
+	case 'h':		/* abbreviated commit hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'T':		/* tree hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
+		return;
+	case 't':		/* abbreviated tree hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'P':		/* parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
+		}
+		return;
+	case 'p':		/* abbreviated parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, find_unique_abbrev(
+					p->item->object.sha1, DEFAULT_ABBREV));
+		}
+		return;
+	case 'm':		/* left/right/bottom */
+		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
+		                 ? '-'
+		                 : (commit->object.flags & SYMMETRIC_LEFT)
+		                 ? '<'
+		                 : '>');
+		return;
+	}
+
+	/* For the rest we have to parse the commit header. */
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
 		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
 			; /* do nothing */
 
 		if (state == SUBJECT) {
-			table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
+			if (placeholder[0] == 's') {
+				strbuf_add(sb, msg + i, eol - i);
+				return;
+			}
 			i = eol;
 		}
 		if (i == eol) {
@@ -868,29 +862,66 @@ void format_commit_message(const struct commit *commit,
 			/* strip empty lines */
 			while (msg[eol + 1] == '\n')
 				eol++;
-		} else if (!prefixcmp(msg + i, "author "))
-			fill_person(table + IAUTHOR_NAME,
-					msg + i + 7, eol - i - 7);
-		else if (!prefixcmp(msg + i, "committer "))
-			fill_person(table + ICOMMITTER_NAME,
-					msg + i + 10, eol - i - 10);
-		else if (!prefixcmp(msg + i, "encoding "))
-			table[IENCODING].value =
-				xmemdupz(msg + i + 9, eol - i - 9);
+		} else if (!prefixcmp(msg + i, "author ")) {
+			if (placeholder[0] == 'a') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 7, eol - i - 7);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "committer ")) {
+			if (placeholder[0] == 'c') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 10, eol - i - 10);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "encoding ")) {
+			if (placeholder[0] == 'e') {
+				strbuf_add(sb, msg + i + 9, eol - i - 9);
+				return;
+			}
+		}
 		i = eol;
 	}
-	if (msg[i])
-		table[IBODY].value = xstrdup(msg + i);
-
-	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
-				format, table, ARRAY_SIZE(table));
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
-					format, table, ARRAY_SIZE(table));
-	}
-	strbuf_setlen(sb, sb->len + len);
-	interp_clear_table(table, ARRAY_SIZE(table));
+	if (msg[i] && placeholder[0] == 'b')	/* body */
+		strbuf_addstr(sb, msg + i);
+}
+
+void format_commit_message(const struct commit *commit,
+                           const void *format, struct strbuf *sb)
+{
+	const char *placeholders[] = {
+		"H",		/* commit hash */
+		"h",		/* abbreviated commit hash */
+		"T",		/* tree hash */
+		"t",		/* abbreviated tree hash */
+		"P",		/* parent hashes */
+		"p",		/* abbreviated parent hashes */
+		"an",		/* author name */
+		"ae",		/* author email */
+		"ad",		/* author date */
+		"aD",		/* author date, RFC2822 style */
+		"ar",		/* author date, relative */
+		"at",		/* author date, UNIX timestamp */
+		"ai",		/* author date, ISO 8601 */
+		"cn",		/* committer name */
+		"ce",		/* committer email */
+		"cd",		/* committer date */
+		"cD",		/* committer date, RFC2822 style */
+		"cr",		/* committer date, relative */
+		"ct",		/* committer date, UNIX timestamp */
+		"ci",		/* committer date, ISO 8601 */
+		"e",		/* encoding */
+		"s",		/* subject */
+		"b",		/* body */
+		"Cred",		/* red */
+		"Cgreen",	/* green */
+		"Cblue",	/* blue */
+		"Creset",	/* reset color */
+		"n",		/* newline */
+		"m",		/* left/right/bottom */
+		NULL
+	};
+	strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index f4201e1..b71da99 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -129,6 +129,28 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_expand(struct strbuf *sb, const char *fmt,
+                   const char **placeholders, expand_fn_t fn, void *context)
+{
+	char c;
+	const char **p;
+
+	while ((c = *fmt++)) {
+		if (c != '%') {
+			strbuf_addch(sb, c);
+			continue;
+		}
+
+		for (p = placeholders; *p; p++) {
+			if (!prefixcmp(fmt, *p)) {
+				fn(sb, *p, context);
+				fmt += strlen(*p);
+				break;
+			}
+		}
+	}
+}
+
 size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
 {
 	size_t res;
diff --git a/strbuf.h b/strbuf.h
index 9b9e861..b9c3e79 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 	strbuf_add(sb, sb2->buf, sb2->len);
 }
 
+typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *fmt, const char **placeholders, expand_fn_t fn, void *context);
+
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-11-05  1:21 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-04 11:49 [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders René Scharfe
2007-11-04 14:08 ` [PATCH 7/5] pretty describe: add min_prio parameter to describe_commit() René Scharfe
2007-11-04 14:11 ` [PATCH 5/5] pretty describe: add %ds, %dn, %dd placeholders Johannes Schindelin
2007-11-04 14:42   ` René Scharfe
2007-11-04 15:25     ` Johannes Schindelin
2007-11-04 17:27       ` René Scharfe
2007-11-05  1:20         ` René Scharfe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).