All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev,
	Marios Makassikis <mmakassikis@freebox.fr>,
	Namjae Jeon <linkinjeon@kernel.org>,
	Steve French <stfrench@microsoft.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.6 05/49] ksmbd: add support for surrogate pair conversion
Date: Wed,  3 Jan 2024 17:55:25 +0100	[thread overview]
Message-ID: <20240103164835.759886589@linuxfoundation.org> (raw)
In-Reply-To: <20240103164834.970234661@linuxfoundation.org>

6.6-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Namjae Jeon <linkinjeon@kernel.org>

[ Upstream commit 0c180317c654a494fe429adbf7bc9b0793caf9e2 ]

ksmbd is missing supporting to convert filename included surrogate pair
characters. It triggers a "file or folder does not exist" error in
Windows client.

[Steps to Reproduce for bug]
1. Create surrogate pair file
 touch $(echo -e '\xf0\x9d\x9f\xa3')
 touch $(echo -e '\xf0\x9d\x9f\xa4')

2. Try to open these files in ksmbd share through Windows client.

This patch update unicode functions not to consider about surrogate pair
(and IVS).

Reviewed-by: Marios Makassikis <mmakassikis@freebox.fr>
Tested-by: Marios Makassikis <mmakassikis@freebox.fr>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/smb/server/unicode.c | 187 +++++++++++++++++++++++++++++-----------
 1 file changed, 138 insertions(+), 49 deletions(-)

diff --git a/fs/smb/server/unicode.c b/fs/smb/server/unicode.c
index 393dd4a7432b6..43ed29ee44ead 100644
--- a/fs/smb/server/unicode.c
+++ b/fs/smb/server/unicode.c
@@ -13,46 +13,10 @@
 #include "unicode.h"
 #include "smb_common.h"
 
-/*
- * smb_utf16_bytes() - how long will a string be after conversion?
- * @from:	pointer to input string
- * @maxbytes:	don't go past this many bytes of input string
- * @codepage:	destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- *
- * Return:	string length after conversion
- */
-static int smb_utf16_bytes(const __le16 *from, int maxbytes,
-			   const struct nls_table *codepage)
-{
-	int i;
-	int charlen, outlen = 0;
-	int maxwords = maxbytes / 2;
-	char tmp[NLS_MAX_CHARSET_SIZE];
-	__u16 ftmp;
-
-	for (i = 0; i < maxwords; i++) {
-		ftmp = get_unaligned_le16(&from[i]);
-		if (ftmp == 0)
-			break;
-
-		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
-		if (charlen > 0)
-			outlen += charlen;
-		else
-			outlen++;
-	}
-
-	return outlen;
-}
-
 /*
  * cifs_mapchar() - convert a host-endian char to proper char in codepage
  * @target:	where converted character should be copied
- * @src_char:	2 byte host-endian source character
+ * @from:	host-endian source string
  * @cp:		codepage to which character should be converted
  * @mapchar:	should character be mapped according to mapchars mount option?
  *
@@ -63,10 +27,13 @@ static int smb_utf16_bytes(const __le16 *from, int maxbytes,
  * Return:	string length after conversion
  */
 static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
 	     bool mapchar)
 {
 	int len = 1;
+	__u16 src_char;
+
+	src_char = *from;
 
 	if (!mapchar)
 		goto cp_convert;
@@ -104,12 +71,66 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 
 cp_convert:
 	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
-	if (len <= 0) {
-		*target = '?';
-		len = 1;
-	}
+	if (len <= 0)
+		goto surrogate_pair;
 
 	goto out;
+
+surrogate_pair:
+	/* convert SURROGATE_PAIR and IVS */
+	if (strcmp(cp->charset, "utf8"))
+		goto unknown;
+	len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+	if (len <= 0)
+		goto unknown;
+	return len;
+
+unknown:
+	*target = '?';
+	len = 1;
+	goto out;
+}
+
+/*
+ * smb_utf16_bytes() - compute converted string length
+ * @from:	pointer to input string
+ * @maxbytes:	input string length
+ * @codepage:	destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ *
+ * Return:	string length after conversion
+ */
+static int smb_utf16_bytes(const __le16 *from, int maxbytes,
+			   const struct nls_table *codepage)
+{
+	int i, j;
+	int charlen, outlen = 0;
+	int maxwords = maxbytes / 2;
+	char tmp[NLS_MAX_CHARSET_SIZE];
+	__u16 ftmp[3];
+
+	for (i = 0; i < maxwords; i++) {
+		ftmp[0] = get_unaligned_le16(&from[i]);
+		if (ftmp[0] == 0)
+			break;
+		for (j = 1; j <= 2; j++) {
+			if (i + j < maxwords)
+				ftmp[j] = get_unaligned_le16(&from[i + j]);
+			else
+				ftmp[j] = 0;
+		}
+
+		charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
+		if (charlen > 0)
+			outlen += charlen;
+		else
+			outlen++;
+	}
+
+	return outlen;
 }
 
 /*
@@ -139,12 +160,12 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 			  const struct nls_table *codepage, bool mapchar)
 {
-	int i, charlen, safelen;
+	int i, j, charlen, safelen;
 	int outlen = 0;
 	int nullsize = nls_nullsize(codepage);
 	int fromwords = fromlen / 2;
 	char tmp[NLS_MAX_CHARSET_SIZE];
-	__u16 ftmp;
+	__u16 ftmp[3];	/* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
 
 	/*
 	 * because the chars can be of varying widths, we need to take care
@@ -155,9 +176,15 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 
 	for (i = 0; i < fromwords; i++) {
-		ftmp = get_unaligned_le16(&from[i]);
-		if (ftmp == 0)
+		ftmp[0] = get_unaligned_le16(&from[i]);
+		if (ftmp[0] == 0)
 			break;
+		for (j = 1; j <= 2; j++) {
+			if (i + j < fromwords)
+				ftmp[j] = get_unaligned_le16(&from[i + j]);
+			else
+				ftmp[j] = 0;
+		}
 
 		/*
 		 * check to see if converting this character might make the
@@ -172,6 +199,19 @@ static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
 		/* put converted char into 'to' buffer */
 		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
 		outlen += charlen;
+
+		/*
+		 * charlen (=bytes of UTF-8 for 1 character)
+		 * 4bytes UTF-8(surrogate pair) is charlen=4
+		 * (4bytes UTF-16 code)
+		 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+		 * (2 UTF-8 pairs divided to 2 UTF-16 pairs)
+		 */
+		if (charlen == 4)
+			i++;
+		else if (charlen >= 5)
+			/* 5-6bytes UTF-8 */
+			i += 2;
 	}
 
 	/* properly null-terminate string */
@@ -306,6 +346,9 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
 	char src_char;
 	__le16 dst_char;
 	wchar_t tmp;
+	wchar_t wchar_to[6];	/* UTF-16 */
+	int ret;
+	unicode_t u;
 
 	if (!mapchars)
 		return smb_strtoUTF16(target, source, srclen, cp);
@@ -348,11 +391,57 @@ int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
 			 * if no match, use question mark, which at least in
 			 * some cases serves as wild card
 			 */
-			if (charlen < 1) {
-				dst_char = cpu_to_le16(0x003f);
-				charlen = 1;
+			if (charlen > 0)
+				goto ctoUTF16;
+
+			/* convert SURROGATE_PAIR */
+			if (strcmp(cp->charset, "utf8"))
+				goto unknown;
+			if (*(source + i) & 0x80) {
+				charlen = utf8_to_utf32(source + i, 6, &u);
+				if (charlen < 0)
+					goto unknown;
+			} else
+				goto unknown;
+			ret  = utf8s_to_utf16s(source + i, charlen,
+					UTF16_LITTLE_ENDIAN,
+					wchar_to, 6);
+			if (ret < 0)
+				goto unknown;
+
+			i += charlen;
+			dst_char = cpu_to_le16(*wchar_to);
+			if (charlen <= 3)
+				/* 1-3bytes UTF-8 to 2bytes UTF-16 */
+				put_unaligned(dst_char, &target[j]);
+			else if (charlen == 4) {
+				/*
+				 * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+				 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+				 * (charlen=3+4 or 4+4)
+				 */
+				put_unaligned(dst_char, &target[j]);
+				dst_char = cpu_to_le16(*(wchar_to + 1));
+				j++;
+				put_unaligned(dst_char, &target[j]);
+			} else if (charlen >= 5) {
+				/* 5-6bytes UTF-8 to 6bytes UTF-16 */
+				put_unaligned(dst_char, &target[j]);
+				dst_char = cpu_to_le16(*(wchar_to + 1));
+				j++;
+				put_unaligned(dst_char, &target[j]);
+				dst_char = cpu_to_le16(*(wchar_to + 2));
+				j++;
+				put_unaligned(dst_char, &target[j]);
 			}
+			continue;
+
+unknown:
+			dst_char = cpu_to_le16(0x003f);
+			charlen = 1;
 		}
+
+ctoUTF16:
 		/*
 		 * character may take more than one byte in the source string,
 		 * but will take exactly two bytes in the target string
-- 
2.43.0




  parent reply	other threads:[~2024-01-03 17:13 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-03 16:55 [PATCH 6.6 00/49] 6.6.10-rc1 review Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 01/49] ksmbd: Remove unused field in ksmbd_user struct Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 02/49] ksmbd: reorganize ksmbd_iov_pin_rsp() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 03/49] ksmbd: fix kernel-doc comment of ksmbd_vfs_setxattr() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 04/49] ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev() Greg Kroah-Hartman
2024-01-03 16:55 ` Greg Kroah-Hartman [this message]
2024-01-03 16:55 ` [PATCH 6.6 06/49] ksmbd: no need to wait for binded connection termination at logoff Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 07/49] ksmbd: fix kernel-doc comment of ksmbd_vfs_kern_path_locked() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 08/49] ksmbd: prevent memory leak on error return Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 09/49] ksmbd: separately allocate ci per dentry Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 10/49] ksmbd: move oplock handling after unlock parent dir Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 11/49] ksmbd: release interim response after sending status pending response Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 12/49] ksmbd: move setting SMB2_FLAGS_ASYNC_COMMAND and AsyncId Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 13/49] ksmbd: dont update ->op_state as OPLOCK_STATE_NONE on error Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 14/49] ksmbd: set epoch in create context v2 lease Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 15/49] ksmbd: set v2 lease capability Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 16/49] ksmbd: downgrade RWH lease caching state to RH for directory Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 17/49] ksmbd: send v2 lease break notification " Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 18/49] ksmbd: lazy v2 lease break on smb2_write() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 19/49] ksmbd: avoid duplicate opinfo_put() call on error of smb21_lease_break_ack() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 20/49] fs: new accessor methods for atime and mtime Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 21/49] client: convert to new timestamp accessors Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 22/49] fs: cifs: Fix atime update check Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 23/49] virtio_ring: fix syncs DMA memory with different direction Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 24/49] kexec: fix KEXEC_FILE dependencies Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 25/49] kexec: select CRYPTO from KEXEC_FILE instead of depending on it Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 26/49] linux/export: Fix alignment for 64-bit ksymtab entries Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 27/49] linux/export: Ensure natural alignment of kcrctab array Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 28/49] mptcp: refactor sndbuf auto-tuning Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 29/49] mptcp: fix possible NULL pointer dereference on close Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 30/49] mptcp: fix inconsistent state on fastopen race Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 31/49] block: renumber QUEUE_FLAG_HW_WC Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 32/49] platform/x86/intel/pmc: Add suspend callback Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 33/49] platform/x86/intel/pmc: Allow reenabling LTRs Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 34/49] platform/x86/intel/pmc: Move GBE LTR ignore to suspend callback Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 35/49] ksmbd: fix slab-out-of-bounds in smb_strndup_from_utf16() Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 36/49] platform/x86: p2sb: Allow p2sb_bar() calls during PCI device probe Greg Kroah-Hartman
2024-01-04  9:01   ` Shinichiro Kawasaki
2024-01-03 16:55 ` [PATCH 6.6 37/49] maple_tree: do not preallocate nodes for slot stores Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 38/49] selftests: secretmem: floor the memory size to the multiple of page_size Greg Kroah-Hartman
2024-01-03 16:55 ` [PATCH 6.6 39/49] mm/filemap: avoid buffered read/write race to read inconsistent data Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 40/49] mm: migrate high-order folios in swap cache correctly Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 41/49] mm/memory-failure: cast index to loff_t before shifting it Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 42/49] mm/memory-failure: check the mapcount of the precise page Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 43/49] Revert "nvme-fc: fix race between error recovery and creating association" Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 44/49] ring-buffer: Fix wake ups when buffer_percent is set to 100 Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 45/49] ftrace: Fix modification of direct_function hash while in use Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 46/49] tracing: Fix blocked reader of snapshot buffer Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 47/49] wifi: cfg80211: fix CQM for non-range use Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 48/49] wifi: nl80211: fix deadlock in nl80211_set_cqm_rssi (6.6.x) Greg Kroah-Hartman
2024-01-03 16:56 ` [PATCH 6.6 49/49] netfilter: nf_tables: skip set commit for deleted/destroyed sets Greg Kroah-Hartman
2024-01-03 17:44 ` [PATCH 6.6 00/49] 6.6.10-rc1 review Nam Cao
2024-01-03 18:57 ` SeongJae Park
2024-01-03 22:04 ` Florian Fainelli
2024-01-03 23:35 ` Kelsey Steele
2024-01-04  0:18 ` Shuah Khan
2024-01-04  2:24 ` Takeshi Ogasawara
2024-01-04  4:10 ` Daniel Díaz
2024-01-04  7:15   ` Daniel Díaz
2024-01-04  7:58     ` Greg Kroah-Hartman
2024-01-04  8:21       ` Johannes Berg
2024-01-04 12:39       ` Naresh Kamboju
2024-01-04 12:58         ` Greg Kroah-Hartman
2024-01-04  5:20 ` Bagas Sanjaya
2024-01-04  7:55 ` Luna Jernberg
2024-01-04  7:57   ` Greg Kroah-Hartman
2024-01-04 10:26 ` Ron Economos
2024-01-04 11:53 ` Harshit Mogalapalli
2024-01-04 16:52 ` Jon Hunter
2024-01-04 17:12 ` Allen
2024-01-05  1:04 ` Guenter Roeck
2024-01-05  2:43 ` Namjae Jeon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240103164835.759886589@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=linkinjeon@kernel.org \
    --cc=mmakassikis@freebox.fr \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=stfrench@microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.