All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>, "Jeff King" <peff@peff.net>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 6/9] pretty: two phase conversion for non utf-8 commits
Date: Sun, 23 Sep 2012 16:10:30 +0700	[thread overview]
Message-ID: <1348391433-11300-7-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1348391433-11300-1-git-send-email-pclouds@gmail.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 5420 bytes --]

Always assume format_commit_item() takes an utf-8 string for
simplicity. If commit message is in non-utf8, or output encoding is
not, then the commit is first converted to utf-8, processed, then
output converted to output encoding.

This of course only works with encodings that are compatible with
Unicode.

This also fixes the iso8859-1 test. It's supposed to create an
iso8859-1 commit, but the commit content in t6006-rev-list-format.sh
is in UTF-8. Split the content out in a separate file (so its encoding
won't accidentally be converted) and convert it back to iso8859-1.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 pretty.c                     | 29 +++++++++++++++++++----------
 t/t6006-rev-list-format.sh   | 16 +++++-----------
 t/t6006/commit-msg.iso8859-1 |  5 +++++
 3 files changed, 29 insertions(+), 21 deletions(-)
 create mode 100644 t/t6006/commit-msg.iso8859-1

diff --git a/pretty.c b/pretty.c
index 45fe878..f3275a7 100644
--- a/pretty.c
+++ b/pretty.c
@@ -916,7 +916,8 @@ static size_t parse_color_placeholder(struct strbuf *sb,
 	return 0;
 }
 
-static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */
+				const char *placeholder,
 				void *context)
 {
 	struct format_commit_context *c = context;
@@ -1121,7 +1122,8 @@ static size_t format_commit_one(struct strbuf *sb, const char *placeholder,
 	return 0;	/* unknown placeholder */
 }
 
-static size_t format_commit_item(struct strbuf *sb, const char *placeholder,
+static size_t format_commit_item(struct strbuf *sb, /* in UTF-8 */
+				 const char *placeholder,
 				 void *context)
 {
 	struct format_commit_context *c = context;
@@ -1205,25 +1207,32 @@ void format_commit_message(const struct commit *commit,
 	struct format_commit_context context;
 	static const char utf8[] = "UTF-8";
 	const char *output_enc = pretty_ctx->output_encoding;
+	char *enc;
 
 	memset(&context, 0, sizeof(context));
 	context.commit = commit;
 	context.pretty_ctx = pretty_ctx;
 	context.wrap_start = sb->len;
 	context.message = commit->buffer;
-	if (output_enc) {
-		char *enc = get_header(commit, "encoding");
-		if (strcmp(enc ? enc : utf8, output_enc)) {
-			context.message = logmsg_reencode(commit, output_enc);
-			if (!context.message)
-				context.message = commit->buffer;
-		}
-		free(enc);
+	enc = get_header(commit, "encoding");
+	if (enc && strcmp(utf8, enc)) {
+		context.message = reencode_string(context.message, utf8, enc);
+		if (!context.message)
+			context.message = commit->buffer;
 	}
+	free(enc);
 
 	strbuf_expand(sb, format, format_commit_item, &context);
 	rewrap_message_tail(sb, &context, 0, 0, 0);
 
+	if (output_enc && strcmp(utf8, output_enc)) {
+		char *out = reencode_string(sb->buf, output_enc, utf8);
+		if (out) {
+			int len = strlen(out);
+			strbuf_attach(sb, out, len, len + 1);
+		}
+	}
+
 	if (context.message != commit->buffer)
 		free(context.message);
 	free(context.signature.gpg_output);
diff --git a/t/t6006-rev-list-format.sh b/t/t6006-rev-list-format.sh
index f94f0c4..cd24839 100755
--- a/t/t6006-rev-list-format.sh
+++ b/t/t6006-rev-list-format.sh
@@ -124,27 +124,21 @@ commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 ^[[1;31;43mfoo^[[m
 EOF
 
-cat >commit-msg <<'EOF'
-Test printing of complex bodies
-
-This commit message is much longer than the others,
-and it will be encoded in iso8859-1. We should therefore
-include an iso8859 character: ¡bueno!
-EOF
+cat "$TEST_DIRECTORY/t6006/commit-msg.iso8859-1" >commit-msg
 test_expect_success 'setup complex body' '
 git config i18n.commitencoding iso8859-1 &&
   echo change2 >foo && git commit -a -F commit-msg
 '
 
 test_format complex-encoding %e <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 iso8859-1
 commit 131a310eb913d107dd3c09a65d1651175898735d
 commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 EOF
 
 test_format complex-subject %s <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 Test printing of complex bodies
 commit 131a310eb913d107dd3c09a65d1651175898735d
 changed foo
@@ -153,7 +147,7 @@ added foo
 EOF
 
 test_format complex-body %b <<'EOF'
-commit f58db70b055c5718631e5c61528b28b12090cdea
+commit 1ed88da4a5b5ed8c449114ac131efc62178734c3
 This commit message is much longer than the others,
 and it will be encoded in iso8859-1. We should therefore
 include an iso8859 character: ¡bueno!
@@ -163,7 +157,7 @@ commit 86c75cfd708a0e5868dc876ed5b8bb66c80b4873
 EOF
 
 test_expect_success '%x00 shows NUL' '
-	echo  >expect commit f58db70b055c5718631e5c61528b28b12090cdea &&
+	echo  >expect commit 1ed88da4a5b5ed8c449114ac131efc62178734c3 &&
 	echo >>expect fooQbar &&
 	git rev-list -1 --format=foo%x00bar HEAD >actual.nul &&
 	nul_to_q <actual.nul >actual &&
diff --git a/t/t6006/commit-msg.iso8859-1 b/t/t6006/commit-msg.iso8859-1
new file mode 100644
index 0000000..f8fe808
--- /dev/null
+++ b/t/t6006/commit-msg.iso8859-1
@@ -0,0 +1,5 @@
+Test printing of complex bodies
+
+This commit message is much longer than the others,
+and it will be encoded in iso8859-1. We should therefore
+include an iso8859 character: ¡bueno!
-- 
1.7.12.1.406.g6ab07c4

  parent reply	other threads:[~2012-09-23  9:18 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-22  4:22 [PATCH 0/6] Advanced --oneline layout Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 1/6] pretty: share code between format_decoration and show_decorations Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 2/6] pretty: split parsing %C into a separate function Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 3/6] pretty: support %C(auto[,N]) to turn on coloring on next placeholder(s) Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 4/6] utf8.c: move display_mode_esc_sequence_len() for use by other functions Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 5/6] utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences Nguyễn Thái Ngọc Duy
2012-09-22  4:22 ` [PATCH 6/6] pretty: support padding placeholders, %< %> and %<> Nguyễn Thái Ngọc Duy
2012-09-22  4:26 ` [PATCH 7/6] pretty: trim trailing spaces due to padding Nguyễn Thái Ngọc Duy
2012-09-23  9:10 ` [PATCH v2 0/9] Advanced --oneline layout Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 1/9] pretty: share code between format_decoration and show_decorations Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 2/9] pretty: split parsing %C into a separate function Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 3/9] pretty: support %C(auto[,N]) to turn on coloring on next placeholder(s) Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 4/9] utf8.c: move display_mode_esc_sequence_len() for use by other functions Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 5/9] utf8.c: add utf8_strnwidth() with the ability to skip ansi sequences Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` Nguyễn Thái Ngọc Duy [this message]
2012-09-23 13:54     ` [PATCH 6/9] pretty: two phase conversion for non utf-8 commits Robin Rosenberg
2012-09-24  1:21       ` Nguyen Thai Ngoc Duy
2012-09-23  9:10   ` [PATCH 7/9] pretty: support padding placeholders, %< %> and %>< Nguyễn Thái Ngọc Duy
2012-10-24  8:25     ` Jeff King
2012-09-23  9:10   ` [PATCH 8/9] pretty: support truncating in %>, %< " Nguyễn Thái Ngọc Duy
2012-09-23  9:10   ` [PATCH 9/9] pretty: support %>> that steal trailing spaces Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1348391433-11300-7-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.