All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Pali Rohár" <pali@kernel.org>
To: linux-fsdevel@vger.kernel.org,
	linux-ntfs-dev@lists.sourceforge.net, linux-cifs@vger.kernel.org,
	jfs-discussion@lists.sourceforge.net,
	linux-kernel@vger.kernel.org,
	"Alexander Viro" <viro@zeniv.linux.org.uk>,
	"Jan Kara" <jack@suse.cz>,
	"OGAWA Hirofumi" <hirofumi@mail.parknet.co.jp>,
	"Theodore Y . Ts'o" <tytso@mit.edu>,
	"Luis de Bethencourt" <luisbg@kernel.org>,
	"Salah Triki" <salah.triki@gmail.com>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Dave Kleikamp" <shaggy@kernel.org>,
	"Anton Altaparmakov" <anton@tuxera.com>,
	"Pavel Machek" <pavel@ucw.cz>, "Marek Behún" <marek.behun@nic.cz>,
	"Christoph Hellwig" <hch@infradead.org>
Subject: [RFC PATCH 01/20] fat: Fix iocharset=utf8 mount option
Date: Sun,  8 Aug 2021 18:24:34 +0200	[thread overview]
Message-ID: <20210808162453.1653-2-pali@kernel.org> (raw)
In-Reply-To: <20210808162453.1653-1-pali@kernel.org>

Currently iocharset=utf8 mount option is broken and error is printed to
dmesg when it is used. To use UTF-8 as iocharset, it is required to use
utf8=1 mount option.

Fix iocharset=utf8 mount option to use be equivalent to the utf8=1 mount
option and remove printing error from dmesg.

FAT by definition is case-insensitive but current Linux implementation is
case-sensitive for non-ASCII characters when UTF-8 is used. This patch does
not change this UTF-8 behavior. Only more comments in fat_utf8_strnicmp()
function are added about it.

After this patch iocharset=utf8 starts working, so there is no need to have
separate config option FAT_DEFAULT_UTF8 as FAT_DEFAULT_IOCHARSET for utf8
also starts working. So remove redundant config option FAT_DEFAULT_UTF8.

Signed-off-by: Pali Rohár <pali@kernel.org>
---
 fs/fat/Kconfig      | 15 ---------------
 fs/fat/dir.c        | 17 +++++++----------
 fs/fat/fat.h        | 22 ++++++++++++++++++++++
 fs/fat/inode.c      | 28 +++++++++++-----------------
 fs/fat/namei_vfat.c | 26 +++++++++++++++++++-------
 5 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 66532a71e8fd..a31594137d5e 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -100,18 +100,3 @@ config FAT_DEFAULT_IOCHARSET
 
 	  Enable any character sets you need in File Systems/Native Language
 	  Support.
-
-config FAT_DEFAULT_UTF8
-	bool "Enable FAT UTF-8 option by default"
-	depends on VFAT_FS
-	default n
-	help
-	  Set this if you would like to have "utf8" mount option set
-	  by default when mounting FAT filesystems.
-
-	  Even if you say Y here can always disable UTF-8 for
-	  particular mount by adding "utf8=0" to mount options.
-
-	  Say Y if you use UTF-8 encoding for file names, N otherwise.
-
-	  See <file:Documentation/filesystems/vfat.rst> for more information.
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c4a274285858..49fe8dc6e5f0 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -33,11 +33,6 @@
 #define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
 #define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))
 
-static inline unsigned char fat_tolower(unsigned char c)
-{
-	return ((c >= 'A') && (c <= 'Z')) ? c+32 : c;
-}
-
 static inline loff_t fat_make_i_pos(struct super_block *sb,
 				    struct buffer_head *bh,
 				    struct msdos_dir_entry *de)
@@ -258,10 +253,12 @@ static inline int fat_name_match(struct msdos_sb_info *sbi,
 	if (a_len != b_len)
 		return 0;
 
-	if (sbi->options.name_check != 's')
-		return !nls_strnicmp(sbi->nls_io, a, b, a_len);
-	else
+	if (sbi->options.name_check == 's')
 		return !memcmp(a, b, a_len);
+	else if (sbi->options.utf8)
+		return !fat_utf8_strnicmp(a, b, a_len);
+	else
+		return !nls_strnicmp(sbi->nls_io, a, b, a_len);
 }
 
 enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
@@ -384,7 +381,7 @@ static int fat_parse_short(struct super_block *sb,
 					de->lcase & CASE_LOWER_BASE);
 		if (chl <= 1) {
 			if (!isvfat)
-				ptname[i] = nocase ? c : fat_tolower(c);
+				ptname[i] = nocase ? c : fat_ascii_to_lower(c);
 			i++;
 			if (c != ' ') {
 				name_len = i;
@@ -421,7 +418,7 @@ static int fat_parse_short(struct super_block *sb,
 		if (chl <= 1) {
 			k++;
 			if (!isvfat)
-				ptname[i] = nocase ? c : fat_tolower(c);
+				ptname[i] = nocase ? c : fat_ascii_to_lower(c);
 			i++;
 			if (c != ' ') {
 				name_len = i;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 02d4d4234956..0cd15fb3b042 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -310,6 +310,28 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
 #endif
 }
 
+static inline unsigned char fat_ascii_to_lower(unsigned char c)
+{
+	return ((c >= 'A') && (c <= 'Z')) ? c+32 : c;
+}
+
+static inline int fat_utf8_strnicmp(const unsigned char *a,
+				    const unsigned char *b,
+				    int len)
+{
+	int i;
+
+	/*
+	 * FIXME: UTF-8 doesn't provide FAT semantics
+	 * Case-insensitive support is only for 7-bit ASCII characters
+	 */
+	for (i = 0; i < len; i++) {
+		if (fat_ascii_to_lower(a[i]) != fat_ascii_to_lower(b[i]))
+			return 1;
+	}
+	return 0;
+}
+
 /* fat/cache.c */
 extern void fat_cache_inval_inode(struct inode *inode);
 extern int fat_get_cluster(struct inode *inode, int cluster,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index de0c9b013a85..f8c8a739f8f0 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -957,7 +957,9 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
 		/* strip "cp" prefix from displayed option */
 		seq_printf(m, ",codepage=%s", &sbi->nls_disk->charset[2]);
 	if (isvfat) {
-		if (sbi->nls_io)
+		if (opts->utf8)
+			seq_printf(m, ",iocharset=utf8");
+		else if (sbi->nls_io)
 			seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
 
 		switch (opts->shortname) {
@@ -994,8 +996,6 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
 		if (opts->nocase)
 			seq_puts(m, ",nocase");
 	} else {
-		if (opts->utf8)
-			seq_puts(m, ",utf8");
 		if (opts->unicode_xlate)
 			seq_puts(m, ",uni_xlate");
 		if (!opts->numtail)
@@ -1157,8 +1157,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 	opts->errors = FAT_ERRORS_RO;
 	*debug = 0;
 
-	opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
-
 	if (!options)
 		goto out;
 
@@ -1319,10 +1317,14 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 					| VFAT_SFN_CREATE_WIN95;
 			break;
 		case Opt_utf8_no:		/* 0 or no or false */
-			opts->utf8 = 0;
+			fat_reset_iocharset(opts);
 			break;
 		case Opt_utf8_yes:		/* empty or 1 or yes or true */
-			opts->utf8 = 1;
+			fat_reset_iocharset(opts);
+			iocharset = kstrdup("utf8", GFP_KERNEL);
+			if (!iocharset)
+				return -ENOMEM;
+			opts->iocharset = iocharset;
 			break;
 		case Opt_uni_xl_no:		/* 0 or no or false */
 			opts->unicode_xlate = 0;
@@ -1360,18 +1362,11 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
 	}
 
 out:
-	/* UTF-8 doesn't provide FAT semantics */
-	if (!strcmp(opts->iocharset, "utf8")) {
-		fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
-		       " for FAT filesystems, filesystem will be "
-		       "case sensitive!");
-	}
+	opts->utf8 = !strcmp(opts->iocharset, "utf8") && is_vfat;
 
 	/* If user doesn't specify allow_utime, it's initialized from dmask. */
 	if (opts->allow_utime == (unsigned short)-1)
 		opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
-	if (opts->unicode_xlate)
-		opts->utf8 = 0;
 	if (opts->nfs == FAT_NFS_NOSTALE_RO) {
 		sb->s_flags |= SB_RDONLY;
 		sb->s_export_op = &fat_export_ops_nostale;
@@ -1832,8 +1827,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		goto out_fail;
 	}
 
-	/* FIXME: utf8 is using iocharset for upper/lower conversion */
-	if (sbi->options.isvfat) {
+	if (sbi->options.isvfat && !sbi->options.utf8) {
 		sbi->nls_io = load_nls(sbi->options.iocharset);
 		if (!sbi->nls_io) {
 			fat_msg(sb, KERN_ERR, "IO charset %s not found",
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 5369d82e0bfb..efb3cb9ea8a8 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -134,6 +134,7 @@ static int vfat_hash(const struct dentry *dentry, struct qstr *qstr)
 static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr)
 {
 	struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+	int utf8 = MSDOS_SB(dentry->d_sb)->options.utf8;
 	const unsigned char *name;
 	unsigned int len;
 	unsigned long hash;
@@ -142,8 +143,17 @@ static int vfat_hashi(const struct dentry *dentry, struct qstr *qstr)
 	len = vfat_striptail_len(qstr);
 
 	hash = init_name_hash(dentry);
-	while (len--)
-		hash = partial_name_hash(nls_tolower(t, *name++), hash);
+	if (utf8) {
+		/*
+		 * FIXME: UTF-8 doesn't provide FAT semantics
+		 * Case-insensitive support is only for 7-bit ASCII characters
+		 */
+		while (len--)
+			hash = partial_name_hash(fat_ascii_to_lower(*name++), hash);
+	} else {
+		while (len--)
+			hash = partial_name_hash(nls_tolower(t, *name++), hash);
+	}
 	qstr->hash = end_name_hash(hash);
 
 	return 0;
@@ -156,16 +166,18 @@ static int vfat_cmpi(const struct dentry *dentry,
 		unsigned int len, const char *str, const struct qstr *name)
 {
 	struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
+	int utf8 = MSDOS_SB(dentry->d_sb)->options.utf8;
 	unsigned int alen, blen;
 
 	/* A filename cannot end in '.' or we treat it like it has none */
 	alen = vfat_striptail_len(name);
 	blen = __vfat_striptail_len(len, str);
-	if (alen == blen) {
-		if (nls_strnicmp(t, name->name, str, alen) == 0)
-			return 0;
-	}
-	return 1;
+	if (alen != blen)
+		return 1;
+	else if (utf8)
+		return fat_utf8_strnicmp(name->name, str, alen);
+	else
+		return nls_strnicmp(t, name->name, str, alen);
 }
 
 /*
-- 
2.20.1


  reply	other threads:[~2021-08-08 16:25 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-08 16:24 [RFC PATCH 00/20] fs: Remove usage of broken nls_utf8 and drop it Pali Rohár
2021-08-08 16:24 ` Pali Rohár [this message]
2021-08-15  3:42   ` [RFC PATCH 01/20] fat: Fix iocharset=utf8 mount option OGAWA Hirofumi
2021-08-15  9:42     ` Pali Rohár
2021-08-15 11:23       ` OGAWA Hirofumi
2021-08-23  3:51   ` Kari Argillander
2021-08-08 16:24 ` [RFC PATCH 02/20] hfsplus: Add iocharset= mount option as alias for nls= Pali Rohár
2021-08-09 17:51   ` Viacheslav Dubeyko
2021-08-09 20:49   ` Kari Argillander
2021-08-09 21:25     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 03/20] udf: Fix iocharset=utf8 mount option Pali Rohár
2021-08-12 14:17   ` Jan Kara
2021-08-12 15:51     ` Pali Rohár
2021-08-13 13:48       ` Jan Kara
2021-08-19  8:34         ` Pali Rohár
2021-08-19 10:41           ` Jan Kara
2021-08-08 16:24 ` [RFC PATCH 04/20] isofs: joliet: " Pali Rohár
2021-08-12 14:18   ` Jan Kara
2021-08-08 16:24 ` [RFC PATCH 05/20] ntfs: Undeprecate iocharset= " Pali Rohár
2021-08-09 20:52   ` Kari Argillander
2021-08-19  1:21   ` Kari Argillander
2021-08-19  8:12     ` Pali Rohár
2021-08-19 10:23       ` Kari Argillander
2021-08-19 22:04         ` Pali Rohár
2021-08-19 23:18           ` Kari Argillander
2021-08-08 16:24 ` [RFC PATCH 06/20] ntfs: Fix error processing when load_nls() fails Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 07/20] befs: Fix printing iocharset= mount option Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 08/20] befs: Rename enum value Opt_charset to Opt_iocharset to match " Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 09/20] befs: Fix error processing when load_nls() fails Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 10/20] befs: Allow to use native UTF-8 mode Pali Rohár
2021-08-08 19:20   ` kernel test robot
2021-08-08 16:24 ` [RFC PATCH 11/20] hfs: Explicitly set hsb->nls_disk when hsb->nls_io is set Pali Rohár
2021-08-09 17:31   ` Viacheslav Dubeyko
2021-08-09 17:37     ` Matthew Wilcox
2021-08-09 17:47       ` Pali Rohár
2021-08-09 20:43         ` Steve French
2021-08-09 18:00       ` Viacheslav Dubeyko
2021-08-08 16:24 ` [RFC PATCH 12/20] hfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2021-08-09 17:49   ` Viacheslav Dubeyko
2022-09-25 12:06     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 13/20] hfsplus: " Pali Rohár
2021-08-09 17:42   ` Viacheslav Dubeyko
2022-09-25 12:12     ` Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 14/20] jfs: Remove custom iso8859-1 implementation Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 15/20] jfs: Fix buffer overflow in jfs_strfromUCS_le() function Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 16/20] jfs: Do not use broken utf8 NLS table for iocharset=utf8 mount option Pali Rohár
2021-08-09 22:51   ` kernel test robot
2021-08-08 16:24 ` [RFC PATCH 17/20] ntfs: " Pali Rohár
2021-08-08 17:53   ` kernel test robot
2021-08-10  0:34   ` kernel test robot
2021-08-08 16:24 ` [RFC PATCH 18/20] cifs: " Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 19/20] cifs: Remove usage of load_nls_default() calls Pali Rohár
2021-08-08 16:24 ` [RFC PATCH 20/20] nls: Drop broken nls_utf8 module Pali Rohár
2021-09-03 21:26 ` [RFC PATCH 00/20] fs: Remove usage of broken nls_utf8 and drop it Kari Argillander
2021-09-03 21:37   ` Pali Rohár
2021-09-03 22:06     ` Kari Argillander

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210808162453.1653-2-pali@kernel.org \
    --to=pali@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=anton@tuxera.com \
    --cc=hch@infradead.org \
    --cc=hirofumi@mail.parknet.co.jp \
    --cc=jack@suse.cz \
    --cc=jfs-discussion@lists.sourceforge.net \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-ntfs-dev@lists.sourceforge.net \
    --cc=luisbg@kernel.org \
    --cc=marek.behun@nic.cz \
    --cc=pavel@ucw.cz \
    --cc=salah.triki@gmail.com \
    --cc=shaggy@kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.