All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Vladimir 'φ-coder/phcoder' Serbinenko" <phcoder@gmail.com>
To: Jan Kara <jack@suse.cz>,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH 1/4] Add UTF-16 convenience functions
Date: Fri, 01 Jun 2012 03:10:16 +0200	[thread overview]
Message-ID: <4FC81678.5040106@gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 3810 bytes --]

This is used for UDF and some other FS non-BMP support.
This series requires my previously sent
[PATCH 1/8] Support full unicode in uni2char and char2uni
Can resend if needed.

Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com>
---
 fs/nls/nls_base.c   |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nls/nls_utf8.c   |    2 +-
 include/linux/nls.h |    6 +++++
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 4f6d1ae..0c1ad5b 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -171,6 +171,32 @@ int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
 }
 EXPORT_SYMBOL(utf8s_to_utf16s);
 
+int unicode_to_utf16s(unicode_t u, enum utf16_endian endian,
+		      wchar_t *pwcs, int maxout)
+{
+	u16 *op = pwcs;
+
+	op = pwcs;
+
+	if (u >= PLANE_SIZE) {
+		if (maxout < 2)
+			return -1;
+		u -= PLANE_SIZE;
+		put_utf16(op++, SURROGATE_PAIR |
+			  ((u >> 10) & SURROGATE_BITS),
+			  endian);
+		put_utf16(op++, SURROGATE_PAIR |
+			  SURROGATE_LOW |
+			  (u & SURROGATE_BITS),
+			  endian);
+		return 2;
+	} else {
+		put_utf16(op++, u, endian);
+		return 1;
+	}
+}
+EXPORT_SYMBOL(unicode_to_utf16s);
+
 static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
 {
 	switch (endian) {
@@ -232,6 +258,43 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
 }
 EXPORT_SYMBOL(utf16s_to_utf8s);
 
+int utf16s_to_unicode(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
+		      unicode_t *uni)
+{
+	unsigned long u, v;
+	const wchar_t *pwcs0 = pwcs;
+
+	while (inlen > 0) {
+		u = get_utf16(*pwcs, endian);
+		if (!u)
+			break;
+		pwcs++;
+		inlen--;
+		if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
+			if (u & SURROGATE_LOW) {
+				/* Ignore character and move on */
+				continue;
+			}
+			if (inlen <= 0)
+				break;
+			v = get_utf16(*pwcs, endian);
+			if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
+			    !(v & SURROGATE_LOW)) {
+				/* Ignore character and move on */
+				continue;
+			}
+			u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
+				+ (v & SURROGATE_BITS);
+			pwcs++;
+			inlen--;
+		}
+		*uni = u;
+		return pwcs - pwcs0;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(utf16s_to_unicode);
+
 int register_nls(struct nls_table * nls)
 {
 	struct nls_table ** tmp = &tables;
diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c
index eb6392e..a3b3de0 100644
--- a/fs/nls/nls_utf8.c
+++ b/fs/nls/nls_utf8.c
@@ -37,7 +37,7 @@ static int char2uni(const unsigned char *rawstring, int boundlen,
 		*uni = 0x003f;	/* ? */
 		return -EINVAL;
 	}
-	*uni = (wchar_t) u;
+	*uni = u;
 	return n;
 }
 
diff --git a/include/linux/nls.h b/include/linux/nls.h
index c0292dd..7de1765 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -50,12 +50,18 @@ extern struct nls_table *load_nls(char *);
 extern void unload_nls(struct nls_table *);
 extern struct nls_table *load_nls_default(void);
 
+#define MAX_UTF16_PER_UNICODE 2
+
 extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
 extern int utf8s_to_utf16s(const u8 *s, int len,
 		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
 		enum utf16_endian endian, u8 *s, int maxlen);
+int unicode_to_utf16s(unicode_t u, enum utf16_endian endian,
+		      wchar_t *pwcs, int maxout);
+int utf16s_to_unicode(const wchar_t *pwcs, int inlen, enum utf16_endian endian,
+		      unicode_t *uni);
 
 static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c)
 {
-- 
1.7.10

-- 
Regards
Vladimir 'φ-coder/phcoder' Serbinenko


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 294 bytes --]

             reply	other threads:[~2012-06-01  1:10 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-01  1:10 Vladimir 'φ-coder/phcoder' Serbinenko [this message]
2012-06-04 15:29 ` [PATCH 1/4] Add UTF-16 convenience functions Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FC81678.5040106@gmail.com \
    --to=phcoder@gmail.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.