Linux network filesystem support library
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Steve French <sfrench@samba.org>
Cc: David Howells <dhowells@redhat.com>,
	Paulo Alcantara <pc@manguebit.org>,
	Shyam Prasad N <sprasad@microsoft.com>,
	Tom Talpey <tom@talpey.com>, Stefan Metzmacher <metze@samba.org>,
	Mina Almasry <almasrymina@google.com>,
	linux-cifs@vger.kernel.org, linux-kernel@vger.kernel.org,
	netfs@lists.linux.dev, linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH 04/36] cifs, nls: Provide unicode size determination func
Date: Tue, 19 May 2026 11:21:22 +0100	[thread overview]
Message-ID: <20260519102158.592165-5-dhowells@redhat.com> (raw)
In-Reply-To: <20260519102158.592165-1-dhowells@redhat.com>

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.org>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
---
 fs/nls/nls_base.c            | 33 ++++++++++++++++++++++++++++++
 fs/smb/client/cifs_unicode.c | 39 ++++++++++++++++++++++++++++++++++++
 fs/smb/client/cifs_unicode.h |  2 ++
 include/linux/nls.h          |  1 +
 4 files changed, 75 insertions(+)

diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index a5c3a9f1b8dc..69bc266b4a49 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -174,6 +174,39 @@ int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian,
 }
 EXPORT_SYMBOL(utf8s_to_utf16s);
 
+/**
+ * utf8s_to_len_utf16s - Determine the length of a conversion of UTF8 to UTF16.
+ * @s: The source utf8 string
+ * @inlen: The length of the string
+ */
+ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen)
+{
+	unicode_t u;
+	size_t outcount = 0;
+	int size;
+
+	while (inlen > 0 && *s) {
+		if (*s & 0x80) {
+			size = utf8_to_utf32(s, inlen, &u);
+			if (size < 0)
+				return -EINVAL;
+			s += size;
+			inlen -= size;
+
+			if (u >= PLANE_SIZE)
+				outcount += 2;
+			else
+				outcount++;
+		} else {
+			s++;
+			outcount++;
+			inlen--;
+		}
+	}
+	return outcount * sizeof(wchar_t);
+}
+EXPORT_SYMBOL(utf8s_to_len_utf16s);
+
 static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
 {
 	switch (endian) {
diff --git a/fs/smb/client/cifs_unicode.c b/fs/smb/client/cifs_unicode.c
index 4a8a591f4bca..f8fac73dcc1e 100644
--- a/fs/smb/client/cifs_unicode.c
+++ b/fs/smb/client/cifs_unicode.c
@@ -276,6 +276,45 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len,
 	return i;
 }
 
+/*
+ * Work out how long a string will be once converted to UTF16 in bytes.  This
+ * does not include a NUL terminator.
+ */
+size_t cifs_size_strtoUTF16(const char *from, int len,
+			    const struct nls_table *codepage)
+{
+	wchar_t wchar_to; /* needed to quiet sparse */
+	ssize_t out_len = 0;
+	int charlen;
+
+	/* special case for utf8 to handle no plane0 chars */
+	if (strcmp(codepage->charset, "utf8") == 0) {
+		out_len = utf8s_to_len_utf16s(from, len);
+		if (out_len >= 0)
+			goto success;
+		/*
+		 * On failure, fall back to UCS encoding as this function
+		 * should not return negative values currently can fail only if
+		 * source contains invalid encoded characters
+		 */
+	}
+
+	for (; len && *from; len -= charlen) {
+		charlen = codepage->char2uni(from, len, &wchar_to);
+		if (charlen < 1) {
+			cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n",
+				 *from, charlen);
+			/* Replace with a question mark */
+			charlen = 1;
+		}
+		from += charlen;
+		out_len += 2;
+	}
+
+success:
+	return out_len;
+}
+
 /*
  * cifs_utf16_bytes - how long will a string be after conversion?
  * @utf16 - pointer to input string
diff --git a/fs/smb/client/cifs_unicode.h b/fs/smb/client/cifs_unicode.h
index 3e9cd9acf0a9..f774e0dd7461 100644
--- a/fs/smb/client/cifs_unicode.h
+++ b/fs/smb/client/cifs_unicode.h
@@ -61,6 +61,8 @@ int cifs_utf16_bytes(const __le16 *from, int maxbytes,
 		     const struct nls_table *codepage);
 int cifs_strtoUTF16(__le16 *to, const char *from, int len,
 		    const struct nls_table *codepage);
+size_t cifs_size_strtoUTF16(const char *from, int len,
+			    const struct nls_table *codepage);
 char *cifs_strndup_from_utf16(const char *src, const int maxlen,
 			      const bool is_unicode,
 			      const struct nls_table *codepage);
diff --git a/include/linux/nls.h b/include/linux/nls.h
index e0bf8367b274..026da1d5ffaa 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -56,6 +56,7 @@ extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
 extern int utf8s_to_utf16s(const u8 *s, int len,
 		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
+ssize_t utf8s_to_len_utf16s(const u8 *s, int inlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
 		enum utf16_endian endian, u8 *s, int maxlen);
 


  parent reply	other threads:[~2026-05-19 10:22 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20260519102158.592165-1-dhowells@redhat.com>
2026-05-19 10:21 ` [RFC PATCH 01/36] net: Perform special handling for a splice from a bvecq David Howells
2026-05-19 10:21 ` [RFC PATCH 02/36] netfs: Add a facility to splice TCP receive buffers into " David Howells
2026-05-19 10:21 ` [RFC PATCH 03/36] netfs: Add some TCP receive queue helpers David Howells
2026-05-19 10:21 ` David Howells [this message]
2026-05-19 10:21 ` [RFC PATCH 05/36] cifs: Introduce an ALIGN8() macro David Howells
2026-05-19 10:21 ` [RFC PATCH 06/36] cifs: Rename mid_q_entry to smb_message David Howells
2026-05-19 10:21 ` [RFC PATCH 07/36] cifs: Add "Has dynamic part" flag form SMB2/3 StructureSize LSB David Howells
2026-05-19 10:21 ` [RFC PATCH 09/36] cifs: Institute message managing struct David Howells
2026-05-19 10:21 ` [RFC PATCH 10/36] cifs: Split crypt_message() into encrypt and decrypt variants David Howells
2026-05-19 10:21 ` [RFC PATCH 11/36] cifs: Add new AEAD alloc and setup routines that draw from an iterator David Howells
2026-05-19 10:21 ` [RFC PATCH 12/36] cifs: [WIP] Rewrite base Rx to put data off the socket into a bvecq David Howells
2026-05-19 10:21 ` [RFC PATCH 13/36] cifs: Remove validate_t2() David Howells
2026-05-19 10:21 ` [RFC PATCH 14/36] cifs: Remove cifs_io_subrequest::got_bytes David Howells
2026-05-19 10:21 ` [RFC PATCH 15/36] cifs: Pass smb_message to cifs_verify_signature() David Howells
2026-05-19 10:21 ` [RFC PATCH 16/36] cifs: Rewrite base TCP transmission David Howells
2026-05-19 10:36   ` Stefan Metzmacher
2026-05-19 10:21 ` [RFC PATCH 17/36] cifs: Don't use corking David Howells
2026-05-19 10:21 ` [RFC PATCH 20/36] cifs: Pass smb_message structs down into the transport layer David Howells
2026-05-19 10:21 ` [RFC PATCH 21/36] cifs: Add a tracepoint to trace the smb_message refcount David Howells
2026-05-19 10:21 ` [RFC PATCH 22/36] cifs: Trace smb1/2_copy_to_prepped_buffers() David Howells
2026-05-19 10:21 ` [RFC PATCH 23/36] cifs: Clean up mid->callback_data and kill off mid->creator David Howells
2026-05-19 10:21 ` [RFC PATCH 24/36] cifs: Add netmem allocation functions David Howells
2026-05-19 10:21 ` [RFC PATCH 25/36] cifs: Add more pieces to smb_message David Howells
2026-05-19 10:21 ` [RFC PATCH 26/36] cifs: Convert SMB2 Negotiate Protocol request David Howells
2026-05-19 10:21 ` [RFC PATCH 27/36] cifs: Convert SMB2 Session Setup request David Howells
2026-05-19 10:21 ` [RFC PATCH 28/36] cifs: Convert SMB2 Logoff request David Howells
2026-05-19 10:21 ` [RFC PATCH 29/36] cifs: Convert SMB2 Tree Connect request David Howells
2026-05-19 10:21 ` [RFC PATCH 30/36] cifs: Convert SMB2 Tree Disconnect request David Howells
2026-05-19 10:21 ` [RFC PATCH 31/36] cifs: Convert SMB2 Read request David Howells
2026-05-19 10:21 ` [RFC PATCH 32/36] cifs: Convert SMB2 Write request David Howells
2026-05-19 10:21 ` [RFC PATCH 33/36] cifs: [WIP] Don't copy new-style smb_messages to a set of pages David Howells
2026-05-19 10:21 ` [RFC PATCH 34/36] cifs: [WIP] Rearrange Create request subfuncs David Howells
2026-05-19 10:21 ` [RFC PATCH 35/36] cifs: [WIP] Convert SMB2 Posix Mkdir request David Howells
2026-05-19 10:21 ` [RFC PATCH 36/36] cifs: [WIP] Convert SMB2 Open request David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260519102158.592165-5-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=almasrymina@google.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=metze@samba.org \
    --cc=netfs@lists.linux.dev \
    --cc=pc@manguebit.org \
    --cc=sfrench@samba.org \
    --cc=sprasad@microsoft.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox