From: "Pali Rohár" <pali@kernel.org>
To: linux-fsdevel@vger.kernel.org,
linux-ntfs-dev@lists.sourceforge.net, linux-cifs@vger.kernel.org,
jfs-discussion@lists.sourceforge.net,
linux-kernel@vger.kernel.org,
Alexander Viro <viro@zeniv.linux.org.uk>, Jan Kara <jack@suse.cz>,
"Theodore Y . Ts'o" <tytso@mit.edu>,
Anton Altaparmakov <anton@tuxera.com>,
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>,
Luis de Bethencourt <luisbg@kernel.org>,
Salah Triki <salah.triki@gmail.com>,
Steve French <sfrench@samba.org>, Paulo Alcantara <pc@cjr.nz>,
Ronnie Sahlberg <lsahlber@redhat.com>,
Shyam Prasad N <sprasad@microsoft.com>,
Tom Talpey <tom@talpey.com>, Dave Kleikamp <shaggy@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Pavel Machek <pavel@ucw.cz>,
Christoph Hellwig <hch@infradead.org>,
Kari Argillander <kari.argillander@gmail.com>,
Viacheslav Dubeyko <slava@dubeyko.com>
Subject: [RFC PATCH v2 11/18] hfsplus: Do not use broken utf8 NLS table for iocharset=utf8 mount option
Date: Mon, 26 Dec 2022 15:21:43 +0100 [thread overview]
Message-ID: <20221226142150.13324-12-pali@kernel.org> (raw)
In-Reply-To: <20221226142150.13324-1-pali@kernel.org>
NLS table for utf8 is broken and cannot be fixed.
So instead of broken utf8 nls functions char2uni() and uni2char() use
functions utf8_to_utf32() and utf32_to_utf8() which implements correct
encoding and decoding between Unicode code points and UTF-8 sequence.
Note that this fs driver does not support full Unicode range, specially
UTF-16 surrogate pairs are unsupported. This patch does not change this
limitation and support for UTF-16 surrogate pairs stay unimplemented.
When iochatset=utf8 is used then set sbi->nls to NULL and use it for
distinguish between the fact if NLS table or native UTF-8 functions should
be used.
Signed-off-by: Pali Rohár <pali@kernel.org>
---
fs/hfsplus/dir.c | 7 +++++--
fs/hfsplus/options.c | 32 ++++++++++++++++++--------------
fs/hfsplus/super.c | 7 +------
fs/hfsplus/unicode.c | 31 ++++++++++++++++++++++++++++---
fs/hfsplus/xattr.c | 20 +++++++++++++-------
fs/hfsplus/xattr_security.c | 6 ++++--
6 files changed, 69 insertions(+), 34 deletions(-)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 84714bbccc12..b19cb6c34dd2 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -144,7 +144,9 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return err;
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN + 1, GFP_KERNEL);
+ len = (HFSPLUS_SB(sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_MAX_STRLEN + 1;
+ strbuf = kmalloc(len, GFP_KERNEL);
if (!strbuf) {
err = -ENOMEM;
goto out;
@@ -203,7 +205,8 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
type = be16_to_cpu(entry.type);
- len = NLS_MAX_CHARSET_SIZE * HFSPLUS_MAX_STRLEN;
+ len = (HFSPLUS_SB(sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_MAX_STRLEN;
err = hfsplus_uni2asc(sb, &fd.key->cat.name, strbuf, &len);
if (err)
goto out;
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index d3dc0d4ba77f..ede7776d1da9 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -104,6 +104,9 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
char *p;
substring_t args[MAX_OPT_ARGS];
int tmp, token;
+ bool have_iocharset;
+
+ have_iocharset = false;
if (!input)
goto done;
@@ -175,20 +178,24 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
pr_warn("option nls= is deprecated, use iocharset=\n");
fallthrough;
case opt_iocharset:
- if (sbi->nls) {
+ if (have_iocharset) {
pr_err("unable to change nls mapping\n");
return 0;
}
p = match_strdup(&args[0]);
- if (p)
- sbi->nls = load_nls(p);
- if (!sbi->nls) {
- pr_err("unable to load nls mapping \"%s\"\n",
- p);
- kfree(p);
+ if (!p)
return 0;
+ if (strcmp(p, "utf8") != 0) {
+ sbi->nls = load_nls(p);
+ if (!sbi->nls) {
+ pr_err("unable to load nls mapping "
+ "\"%s\"\n", p);
+ kfree(p);
+ return 0;
+ }
}
kfree(p);
+ have_iocharset = true;
break;
case opt_decompose:
clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
@@ -211,13 +218,10 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
}
done:
- if (!sbi->nls) {
- /* try utf8 first, as this is the old default behaviour */
- sbi->nls = load_nls("utf8");
- if (!sbi->nls)
- sbi->nls = load_nls_default();
- if (!sbi->nls)
- return 0;
+ if (!have_iocharset) {
+ /* use utf8, as this is the old default behaviour */
+ pr_debug("using native UTF-8 without nls\n");
+ /* no sbi->nls means that native UTF-8 code is used */
}
return 1;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 122ed89ebf9f..8a66a77ad3e1 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -403,11 +403,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
/* temporarily use utf8 to correctly find the hidden dir below */
nls = sbi->nls;
- sbi->nls = load_nls("utf8");
- if (!sbi->nls) {
- pr_err("unable to load nls for utf8\n");
- goto out_unload_nls;
- }
+ sbi->nls = NULL;
/* Grab the volume header */
if (hfsplus_read_wrapper(sb)) {
@@ -585,7 +581,6 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
}
}
- unload_nls(sbi->nls);
sbi->nls = nls;
return 0;
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 73342c925a4b..dc9be40d049f 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -190,7 +190,12 @@ int hfsplus_uni2asc(struct super_block *sb,
c0 = ':';
break;
}
- res = nls->uni2char(c0, op, len);
+ if (nls)
+ res = nls->uni2char(c0, op, len);
+ else if (len > 0)
+ res = utf32_to_utf8(c0, op, len);
+ else
+ res = -ENAMETOOLONG;
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
@@ -233,7 +238,12 @@ int hfsplus_uni2asc(struct super_block *sb,
cc = c0;
}
done:
- res = nls->uni2char(cc, op, len);
+ if (nls)
+ res = nls->uni2char(cc, op, len);
+ else if (len > 0)
+ res = utf32_to_utf8(cc, op, len);
+ else
+ res = -ENAMETOOLONG;
if (res < 0) {
if (res == -ENAMETOOLONG)
goto out;
@@ -256,7 +266,22 @@ int hfsplus_uni2asc(struct super_block *sb,
static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
wchar_t *uc)
{
- int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
+ struct nls_table *nls = HFSPLUS_SB(sb)->nls;
+ unicode_t u;
+ int size;
+
+ if (nls)
+ size = nls->char2uni(astr, len, uc);
+ else {
+ size = utf8_to_utf32(astr, len, &u);
+ if (size >= 0) {
+ /* TODO: Add support for UTF-16 surrogate pairs */
+ if (u <= MAX_WCHAR_T)
+ *uc = u;
+ else
+ size = -EINVAL;
+ }
+ }
if (size <= 0) {
*uc = '?';
size = 1;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index 49891b12c415..607f46b3d0f3 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -422,11 +422,13 @@ int hfsplus_setxattr(struct inode *inode, const char *name,
const void *value, size_t size, int flags,
const char *prefix, size_t prefixlen)
{
+ int xattr_name_len;
char *xattr_name;
int res;
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
+ xattr_name_len = (HFSPLUS_SB(inode->i_sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_ATTR_MAX_STRLEN + 1;
+ xattr_name = kmalloc(xattr_name_len, GFP_KERNEL);
if (!xattr_name)
return -ENOMEM;
strcpy(xattr_name, prefix);
@@ -578,9 +580,11 @@ ssize_t hfsplus_getxattr(struct inode *inode, const char *name,
{
int res;
char *xattr_name;
+ int xattr_name_len;
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
+ xattr_name_len = (HFSPLUS_SB(inode->i_sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_ATTR_MAX_STRLEN + 1;
+ xattr_name = kmalloc(xattr_name_len, GFP_KERNEL);
if (!xattr_name)
return -ENOMEM;
@@ -699,8 +703,9 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
return err;
}
- strbuf = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN +
- XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
+ xattr_name_len = (HFSPLUS_SB(inode->i_sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_ATTR_MAX_STRLEN + XATTR_MAC_OSX_PREFIX_LEN + 1;
+ strbuf = kmalloc(xattr_name_len, GFP_KERNEL);
if (!strbuf) {
res = -ENOMEM;
goto out;
@@ -732,7 +737,8 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (be32_to_cpu(attr_key.cnid) != inode->i_ino)
goto end_listxattr;
- xattr_name_len = NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN;
+ xattr_name_len = (HFSPLUS_SB(inode->i_sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_ATTR_MAX_STRLEN;
if (hfsplus_uni2asc(inode->i_sb,
(const struct hfsplus_unistr *)&fd.key->attr.key_name,
strbuf, &xattr_name_len)) {
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index c1c7a16cbf21..b4b45c796ef4 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -38,11 +38,13 @@ static int hfsplus_initxattrs(struct inode *inode,
void *fs_info)
{
const struct xattr *xattr;
+ int xattr_name_len;
char *xattr_name;
int err = 0;
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
+ xattr_name_len = (HFSPLUS_SB(inode->i_sb)->nls ? NLS_MAX_CHARSET_SIZE : 4) *
+ HFSPLUS_ATTR_MAX_STRLEN + 1;
+ xattr_name = kmalloc(xattr_name_len, GFP_KERNEL);
if (!xattr_name)
return -ENOMEM;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
--
2.20.1
next prev parent reply other threads:[~2022-12-26 14:23 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-12-26 14:21 [RFC PATCH v2 00/18] fs: Remove usage of broken nls_utf8 and drop it Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 01/18] fat: Fix iocharset=utf8 mount option Pali Rohár
2023-01-10 9:17 ` OGAWA Hirofumi
2023-02-04 10:57 ` Pali Rohár
2023-02-08 10:10 ` OGAWA Hirofumi
2022-12-26 14:21 ` [RFC PATCH v2 02/18] hfsplus: Add iocharset= mount option as alias for nls= Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 03/18] ntfs: Undeprecate iocharset= mount option Pali Rohár
2023-01-01 19:02 ` Kari Argillander
2023-01-01 19:06 ` Pali Rohár
2023-01-01 23:02 ` Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 04/18] ntfs: Fix error processing when load_nls() fails Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 05/18] befs: Fix printing iocharset= mount option Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 06/18] befs: Rename enum value Opt_charset to Opt_iocharset to match " Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 07/18] befs: Fix error processing when load_nls() fails Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 08/18] befs: Allow to use native UTF-8 mode Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 09/18] hfs: Explicitly set hsb->nls_disk when hsb->nls_io is set Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 10/18] hfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2022-12-26 14:21 ` Pali Rohár [this message]
2022-12-26 14:21 ` [RFC PATCH v2 12/18] jfs: Remove custom iso8859-1 implementation Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 13/18] jfs: Fix buffer overflow in jfs_strfromUCS_le() function Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 14/18] jfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 15/18] ntfs: " Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 16/18] cifs: " Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 17/18] cifs: Remove usage of load_nls_default() calls Pali Rohár
2022-12-26 14:21 ` [RFC PATCH v2 18/18] nls: Drop broken nls_utf8 module Pali Rohár
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221226142150.13324-12-pali@kernel.org \
--to=pali@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=anton@tuxera.com \
--cc=hch@infradead.org \
--cc=hirofumi@mail.parknet.co.jp \
--cc=jack@suse.cz \
--cc=jfs-discussion@lists.sourceforge.net \
--cc=kari.argillander@gmail.com \
--cc=linux-cifs@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-ntfs-dev@lists.sourceforge.net \
--cc=lsahlber@redhat.com \
--cc=luisbg@kernel.org \
--cc=pavel@ucw.cz \
--cc=pc@cjr.nz \
--cc=salah.triki@gmail.com \
--cc=sfrench@samba.org \
--cc=shaggy@kernel.org \
--cc=slava@dubeyko.com \
--cc=sprasad@microsoft.com \
--cc=tom@talpey.com \
--cc=tytso@mit.edu \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.