From: "Vladimir 'φ-coder/phcoder' Serbinenko" <phcoder@gmail.com>
To: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
Christoph Hellwig <hch@tuxera.com>,
Anton Salikhmetov <alexo@tuxera.com>
Subject: [PATCH 4/8] Support non-BMP characters on HFS+.
Date: Wed, 16 May 2012 01:06:58 +0200 [thread overview]
Message-ID: <4FB2E192.3080004@gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 5027 bytes --]
This one is little bit tricky since HFS+ transforms UTF-16 but since it was designed without any attention to non-BMP characters, they are not decomposed or case-folded.
Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
---
fs/hfsplus/unicode.c | 76 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 62 insertions(+), 14 deletions(-)
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 5b2c8de..161a23b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -97,6 +97,11 @@ int hfsplus_strcmp(const struct hfsplus_unistr *s1,
#define Hangul_TCount 28
#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+#define SURROGATE_LOW 0x00000400
+#define SURROGATE_BITS 0x000003ff
+
static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
{
@@ -189,6 +194,9 @@ int hfsplus_uni2asc(struct super_block *sb,
c0 = ':';
break;
}
+
+ if ((c0 & SURROGATE_MASK) == SURROGATE_PAIR)
+ goto same;
res = nls->uni2char(c0, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
@@ -232,7 +240,19 @@ same:
cc = c0;
}
done:
- res = nls->uni2char(cc, op, len);
+ if ((cc & SURROGATE_MASK) == SURROGATE_PAIR
+ && !(cc & SURROGATE_LOW)
+ && ustrlen
+ && (be16_to_cpu(*ip) & SURROGATE_MASK) == SURROGATE_PAIR
+ && (be16_to_cpu(*ip) & SURROGATE_LOW)) {
+ unicode_t complete;
+ complete = (c0 & SURROGATE_BITS) << 10;
+ complete |= (be16_to_cpu(*ip++) & SURROGATE_BITS);
+ complete += 0x10000;
+ ustrlen--;
+ res = nls->uni2char(complete, op, len);
+ } else
+ res = nls->uni2char(cc, op, len);
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
@@ -256,7 +276,7 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
unicode_t *uc)
{
int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
- if (size <= 0 || *uc > 0xffff) {
+ if (size <= 0) {
*uc = '?';
size = 1;
}
@@ -272,10 +292,13 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
}
/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+static inline u16 *decompose_unichar(unicode_t uc, int *size)
{
int off;
+ if (uc >= 0x10000)
+ return NULL;
+
off = hfsplus_decompose_table[(uc >> 12) & 0xf];
if (off == 0 || off == 0xffff)
return NULL;
@@ -316,8 +339,16 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
do {
ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
} while (--dsize > 0);
- } else
- ustr->unicode[outlen++] = cpu_to_be16(c);
+ } else {
+ int s;
+ s = unicode_to_utf16s(c, UTF16_BIG_ENDIAN,
+ ustr->unicode + outlen,
+ HFSPLUS_MAX_STRLEN - outlen);
+ if (s <= 0)
+ break;
+
+ outlen += s;
+ }
astr += size;
len -= size;
@@ -342,7 +373,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
int casefold, decompose, size, len;
unsigned long hash;
unicode_t c;
- u16 c2;
+ unicode_t c2;
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -369,9 +400,17 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
} while (--dsize > 0);
} else {
c2 = c;
- if (casefold)
+ if (casefold && c2 < 0x10000)
c2 = case_fold(c2);
- if (!casefold || c2)
+ if (c2 >= 0x10000) {
+ int i, s;
+ u16 tmp[2];
+ s = unicode_to_utf16s(c2,
+ UTF16_HOST_ENDIAN,
+ tmp, 2);
+ for (i = 0; i < s; i++)
+ hash = partial_name_hash(tmp[i], hash);
+ } else if (!casefold || c2)
hash = partial_name_hash(c2, hash);
}
}
@@ -395,6 +434,7 @@ int hfsplus_compare_dentry(const struct dentry *parent,
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
const char *astr1, *astr2;
+ u16 buf1[2], buf2[2];
u16 c1, c2;
unicode_t c;
@@ -416,9 +456,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr1 = decompose_unichar(c, &dsize1);
if (!decompose || !dstr1) {
- c1 = c;
- dstr1 = &c1;
- dsize1 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf1, 2);
+ if (s <= 0)
+ s = 0;
+ dstr1 = buf1;
+ dsize1 = s;
}
}
@@ -430,9 +474,13 @@ int hfsplus_compare_dentry(const struct dentry *parent,
if (decompose)
dstr2 = decompose_unichar(c, &dsize2);
if (!decompose || !dstr2) {
- c2 = c;
- dstr2 = &c2;
- dsize2 = 1;
+ int s;
+ s = unicode_to_utf16s(c, UTF16_HOST_ENDIAN,
+ buf2, 2);
+ if (s <= 0)
+ s = 0;
+ dstr2 = buf2;
+ dsize2 = s;
}
}
--
1.7.10
--
Regards
Vladimir 'φ-coder/phcoder' Serbinenko
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 294 bytes --]
reply other threads:[~2012-05-15 23:06 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FB2E192.3080004@gmail.com \
--to=phcoder@gmail.com \
--cc=alexo@tuxera.com \
--cc=hch@tuxera.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.