From: Robin Rosenberg <robin.rosenberg.lists@dewire.com>
To: "Shawn O. Pearce" <spearce@spearce.org>
Cc: git@vger.kernel.org
Subject: Re: [JGIT PATCH v2] Encode/decode index and tree entries using UTF-8
Date: Sun, 19 Oct 2008 20:24:24 +0200 [thread overview]
Message-ID: <200810192024.24466.robin.rosenberg.lists@dewire.com> (raw)
In-Reply-To: <20081019171456.GC14786@spearce.org>
Decoding uses the same strategy as for commit messages and other string
entities. Encoding is always done in UTF-8. This is incompatible with
Git for non-unicode unices, but it leads to the expected behavior on
Windows and cross-locale sharing of repositories.
Signed-off-by: Robin Rosenberg <robin.rosnberg@dewire.com>
---
söndagen den 19 oktober 2008 19.14.56 skrev Shawn O. Pearce:
> Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
> > Decoding uses the same strategy as for commit messages and other string
> > entities. Encoding is always done in UTF-8. This is incompatible with
> > Git for non-unicode unices, but it leads to the expected behavior on
> > Windows and cross-locale sharing of repositories.
>
> FWIW I think this is a good idea.
Ok, so here's the update. We might want to move the encode out of Constants
too as it is no longer a utility for constants.
-- robin
.../src/org/spearce/jgit/lib/GitIndex.java | 27 ++++++++--------
.../src/org/spearce/jgit/lib/Tree.java | 16 ++++-----
.../src/org/spearce/jgit/lib/TreeEntry.java | 14 +++-----
.../src/org/spearce/jgit/util/RawParseUtils.java | 32 ++++++++++++++++++++
4 files changed, 58 insertions(+), 31 deletions(-)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
index 22935ab..bafddef 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/GitIndex.java
@@ -63,6 +63,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.NotSupportedException;
import org.spearce.jgit.util.FS;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of the Git index.
@@ -178,8 +179,9 @@ public Entry add(File wd, File f) throws IOException {
* @param f
* the file whose path shall be removed.
* @return true if such a path was found (and thus removed)
+ * @throws IOException
*/
- public boolean remove(File wd, File f) {
+ public boolean remove(File wd, File f) throws IOException {
byte[] key = makeKey(wd, f);
return entries.remove(key) != null;
}
@@ -300,11 +302,11 @@ static boolean File_hasExecute() {
return FS.INSTANCE.supportsExecute();
}
- static byte[] makeKey(File wd, File f) {
+ static byte[] makeKey(File wd, File f) throws IOException {
if (!f.getPath().startsWith(wd.getPath()))
throw new Error("Path is not in working dir");
String relName = Repository.stripWorkDir(wd, f);
- return relName.getBytes();
+ return Constants.encode(relName);
}
Boolean filemode;
@@ -376,7 +378,7 @@ Entry(TreeEntry f, int stage)
size = -1;
}
sha1 = f.getId();
- name = f.getFullName().getBytes("UTF-8");
+ name = Constants.encode(f.getFullName());
flags = (short) ((stage << 12) | name.length); // TODO: fix flags
}
@@ -580,7 +582,7 @@ private File getFile(File wd) {
}
public String toString() {
- return new String(name) + "/SHA-1(" + sha1.name() + ")/M:"
+ return getName() + "/SHA-1(" + sha1.name() + ")/M:"
+ new Date(ctime / 1000000L) + "/C:"
+ new Date(mtime / 1000000L) + "/d" + dev + "/i" + ino
+ "/m" + Integer.toString(mode, 8) + "/u" + uid + "/g"
@@ -591,7 +593,7 @@ public String toString() {
* @return path name for this entry
*/
public String getName() {
- return new String(name);
+ return RawParseUtils.decode(name);
}
/**
@@ -731,7 +733,7 @@ void readTree(String prefix, Tree t) throws IOException {
readTree(name, (Tree) te);
} else {
Entry e = new Entry(te, 0);
- entries.put(name.getBytes("UTF-8"), e);
+ entries.put(Constants.encode(name), e);
}
}
}
@@ -743,7 +745,7 @@ void readTree(String prefix, Tree t) throws IOException {
* @throws IOException
*/
public Entry addEntry(TreeEntry te) throws IOException {
- byte[] key = te.getFullName().getBytes("UTF-8");
+ byte[] key = Constants.encode(te.getFullName());
Entry e = new Entry(te, 0);
entries.put(key, e);
return e;
@@ -824,8 +826,7 @@ public ObjectId writeTree() throws IOException {
}
while (trees.size() < newName.length) {
if (!current.existsTree(newName[trees.size() - 1])) {
- current = new Tree(current, newName[trees.size() - 1]
- .getBytes());
+ current = new Tree(current, Constants.encode(newName[trees.size() - 1]));
current.getParent().addEntry(current);
trees.push(current);
} else {
@@ -835,7 +836,7 @@ public ObjectId writeTree() throws IOException {
}
}
FileTreeEntry ne = new FileTreeEntry(current, e.sha1,
- newName[newName.length - 1].getBytes(),
+ Constants.encode(newName[newName.length - 1]),
(e.mode & FileMode.EXECUTABLE_FILE.getBits()) == FileMode.EXECUTABLE_FILE.getBits());
current.addEntry(ne);
}
@@ -880,7 +881,7 @@ int longestCommonPath(String[] a, String[] b) {
* Small beware: Unaccounted for are unmerged entries. You may want
* to abort if members with stage != 0 are found if you are doing
* any updating operations. All stages will be found after one another
- * here later. Currently only one stage per name is returned.
+ * here later. Currently only one stage per name is returned.
*
* @return The index entries sorted
*/
@@ -896,7 +897,7 @@ int longestCommonPath(String[] a, String[] b) {
* @throws UnsupportedEncodingException
*/
public Entry getEntry(String path) throws UnsupportedEncodingException {
- return (Entry) entries.get(Repository.gitInternalSlash(path.getBytes("ISO-8859-1")));
+ return (Entry) entries.get(Repository.gitInternalSlash(Constants.encode(path)));
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
index 25a9a71..0ecd04d 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/Tree.java
@@ -44,6 +44,7 @@
import org.spearce.jgit.errors.CorruptObjectException;
import org.spearce.jgit.errors.EntryExistsException;
import org.spearce.jgit.errors.MissingObjectException;
+import org.spearce.jgit.util.RawParseUtils;
/**
* A representation of a Git tree entry. A Tree is a directory in Git.
@@ -251,7 +252,7 @@ public void unload() {
* @throws IOException
*/
public FileTreeEntry addFile(final String name) throws IOException {
- return addFile(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+ return addFile(Repository.gitInternalSlash(Constants.encode(name)), 0);
}
/**
@@ -281,8 +282,7 @@ public FileTreeEntry addFile(final byte[] s, final int offset)
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(newName));
else if (slash < s.length) {
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -304,7 +304,7 @@ else if (slash < s.length) {
* @throws IOException
*/
public Tree addTree(final String name) throws IOException {
- return addTree(Repository.gitInternalSlash(name.getBytes(Constants.CHARACTER_ENCODING)), 0);
+ return addTree(Repository.gitInternalSlash(Constants.encode(name)), 0);
}
/**
@@ -332,8 +332,7 @@ public Tree addTree(final byte[] s, final int offset) throws IOException {
final byte[] newName = substring(s, offset, slash);
if (p >= 0)
- throw new EntryExistsException(new String(newName,
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(RawParseUtils.decode(newName));
final Tree t = new Tree(this, newName);
insertEntry(p, t);
@@ -355,8 +354,7 @@ public void addEntry(final TreeEntry e) throws IOException {
e.attachParent(this);
insertEntry(p, e);
} else {
- throw new EntryExistsException(new String(e.getNameUTF8(),
- Constants.CHARACTER_ENCODING));
+ throw new EntryExistsException(e.getName());
}
}
@@ -450,7 +448,7 @@ public boolean existsBlob(String path) throws IOException {
}
private TreeEntry findMember(final String s, byte slast) throws IOException {
- return findMember(Repository.gitInternalSlash(s.getBytes(Constants.CHARACTER_ENCODING)), slast, 0);
+ return findMember(Repository.gitInternalSlash(Constants.encode(s)), slast, 0);
}
private TreeEntry findMember(final byte[] s, final byte slast, final int offset)
diff --git a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
index 85dda1d..c95863c 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/lib/TreeEntry.java
@@ -39,9 +39,9 @@
package org.spearce.jgit.lib;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import org.spearce.jgit.lib.GitIndex.Entry;
+import org.spearce.jgit.util.RawParseUtils;
/**
* This class represents an entry in a tree, like a blob or another tree.
@@ -126,13 +126,9 @@ public Repository getRepository() {
* @return the name of this entry.
*/
public String getName() {
- try {
- return nameUTF8 != null ? new String(nameUTF8,
- Constants.CHARACTER_ENCODING) : null;
- } catch (UnsupportedEncodingException uee) {
- throw new RuntimeException("JVM doesn't support "
- + Constants.CHARACTER_ENCODING, uee);
- }
+ if (nameUTF8 != null)
+ return RawParseUtils.decode(nameUTF8);
+ return null;
}
/**
@@ -142,7 +138,7 @@ public String getName() {
* @throws IOException
*/
public void rename(final String n) throws IOException {
- rename(n.getBytes(Constants.CHARACTER_ENCODING));
+ rename(Constants.encode(n));
}
/**
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 6c0e339..4b96439 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -379,6 +379,38 @@ public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
}
/**
+ * Decode a buffer under UTF-8, if possible.
+ *
+ * If the byte stream cannot be decoded that way, the platform default is tried
+ * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+ *
+ * @param buffer
+ * buffer to pull raw bytes from.
+ * @return a string representation of the range <code>[start,end)</code>,
+ * after decoding the region through the specified character set.
+ */
+ public static String decode(final byte[] buffer) {
+ return decode(Constants.CHARSET, buffer, 0, buffer.length);
+ }
+
+ /**
+ * Decode a buffer under the specified character set if possible.
+ *
+ * If the byte stream cannot be decoded that way, the platform default is tried
+ * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
+ *
+ * @param cs
+ * character set to use when decoding the buffer.
+ * @param buffer
+ * buffer to pull raw bytes from.
+ * @return a string representation of the range <code>[start,end)</code>,
+ * after decoding the region through the specified character set.
+ */
+ public static String decode(final Charset cs, final byte[] buffer) {
+ return decode(cs, buffer, 0, buffer.length);
+ }
+
+ /**
* Decode a region of the buffer under the specified character set if possible.
*
* If the byte stream cannot be decoded that way, the platform default is tried
--
1.6.0.2.308.gef4a
prev parent reply other threads:[~2008-10-19 18:26 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-10-19 13:29 [JGIT PATCH] Encode/decode index and tree entries using UTF-8 Robin Rosenberg
2008-10-19 17:14 ` Shawn O. Pearce
2008-10-19 18:24 ` Robin Rosenberg [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200810192024.24466.robin.rosenberg.lists@dewire.com \
--to=robin.rosenberg.lists@dewire.com \
--cc=git@vger.kernel.org \
--cc=spearce@spearce.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).