git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [JGIT PATCH 1/2] Add getEncoding() to RevCommit to discover the encoding
@ 2009-06-24  3:11 Shawn O. Pearce
  2009-06-24  3:11 ` [JGIT PATCH 2/2] Add parsing support for Signed-off-by lines in commit messages Shawn O. Pearce
  0 siblings, 1 reply; 3+ messages in thread
From: Shawn O. Pearce @ 2009-06-24  3:11 UTC (permalink / raw)
  To: Robin Rosenberg; +Cc: git

If an application needs to parse the raw buffer by hand it might
benefit from knowing the encoding of the commit.  We can make it
available to them through a getEncoding() method, using the same
logic we already use for getFullMessage() and getShortMessage(),
but this is still only an estimate based on the "encoding" header
and may not be reality if the commit is horribly malformed.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 .../spearce/jgit/revwalk/RevCommitParseTest.java   |    6 ++++++
 .../src/org/spearce/jgit/revwalk/RevCommit.java    |   16 ++++++++++++++++
 2 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java
index 9b95924..62a4ab5 100644
--- a/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java
+++ b/org.spearce.jgit.test/tst/org/spearce/jgit/revwalk/RevCommitParseTest.java
@@ -39,6 +39,7 @@
 
 import java.io.ByteArrayOutputStream;
 
+import org.spearce.jgit.lib.Constants;
 import org.spearce.jgit.lib.ObjectId;
 import org.spearce.jgit.lib.PersonIdent;
 import org.spearce.jgit.lib.RepositoryTestCase;
@@ -145,6 +146,7 @@ public void testParse_implicit_UTF8_encoded() throws Exception {
 		c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
 		c.parseCanonical(new RevWalk(db), b.toByteArray());
 
+		assertSame(Constants.CHARSET, c.getEncoding());
 		assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
 		assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage());
 		assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage());
@@ -163,6 +165,7 @@ public void testParse_implicit_mixed_encoded() throws Exception {
 		c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
 		c.parseCanonical(new RevWalk(db), b.toByteArray());
 
+		assertSame(Constants.CHARSET, c.getEncoding());
 		assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
 		assertEquals("Sm\u00f6rg\u00e5sbord", c.getShortMessage());
 		assertEquals("Sm\u00f6rg\u00e5sbord\n\n\u304d\u308c\u3044\n", c.getFullMessage());
@@ -187,6 +190,7 @@ public void testParse_explicit_encoded() throws Exception {
 		c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
 		c.parseCanonical(new RevWalk(db), b.toByteArray());
 
+		assertEquals("EUC-JP", c.getEncoding().name());
 		assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
 		assertEquals("\u304d\u308c\u3044", c.getShortMessage());
 		assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
@@ -215,6 +219,7 @@ public void testParse_explicit_bad_encoded() throws Exception {
 		c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
 		c.parseCanonical(new RevWalk(db), b.toByteArray());
 
+		assertEquals("EUC-JP", c.getEncoding().name());
 		assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
 		assertEquals("\u304d\u308c\u3044", c.getShortMessage());
 		assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
@@ -244,6 +249,7 @@ public void testParse_explicit_bad_encoded2() throws Exception {
 		c = new RevCommit(id("9473095c4cb2f12aefe1db8a355fe3fafba42f67")); // bogus id
 		c.parseCanonical(new RevWalk(db), b.toByteArray());
 
+		assertEquals("ISO-8859-1", c.getEncoding().name());
 		assertEquals("F\u00f6r fattare", c.getAuthorIdent().getName());
 		assertEquals("\u304d\u308c\u3044", c.getShortMessage());
 		assertEquals("\u304d\u308c\u3044\n\nHi\n", c.getFullMessage());
diff --git a/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java b/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java
index f211dfd..284a183 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/revwalk/RevCommit.java
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2009, Google Inc.
  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
  *
  * All rights reserved.
@@ -377,6 +378,21 @@ static boolean hasLF(final byte[] r, int b, final int e) {
 	}
 
 	/**
+	 * Determine the encoding of the commit message buffer.
+	 * <p>
+	 * Locates the "encoding" header (if present) and then returns the proper
+	 * character set to apply to this buffer to evaluate its contents as
+	 * character data.
+	 * <p>
+	 * If no encoding header is present, {@link Constants#CHARSET} is assumed.
+	 *
+	 * @return the preferred encoding of {@link #getRawBuffer()}.
+	 */
+	public final Charset getEncoding() {
+		return RawParseUtils.parseEncoding(buffer);
+	}
+
+	/**
 	 * Reset this commit to allow another RevWalk with the same instances.
 	 * <p>
 	 * Subclasses <b>must</b> call <code>super.reset()</code> to ensure the
-- 
1.6.3.3.420.gd4b46

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-06-24 17:30 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-06-24  3:11 [JGIT PATCH 1/2] Add getEncoding() to RevCommit to discover the encoding Shawn O. Pearce
2009-06-24  3:11 ` [JGIT PATCH 2/2] Add parsing support for Signed-off-by lines in commit messages Shawn O. Pearce
2009-06-24 17:30   ` [JGIT PATCH 3/2] Support extracting emails from Signed-off-by lines Shawn O. Pearce

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).