From: "Frédéric Danis" <frederic.danis@collabora.com>
To: linux-bluetooth@vger.kernel.org
Subject: [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8
Date: Wed, 9 Jul 2025 11:40:55 +0200 [thread overview]
Message-ID: <20250709094055.516584-1-frederic.danis@collabora.com> (raw)
Move duplicate code to static validateutf8() and fix boundary access
on multi-byte character check.
---
src/shared/util.c | 56 +++++++++++++++--------------------------------
1 file changed, 18 insertions(+), 38 deletions(-)
diff --git a/src/shared/util.c b/src/shared/util.c
index 4780f26b6..36c06188f 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -1909,7 +1909,7 @@ char *strstrip(char *str)
return str;
}
-bool strisutf8(const char *str, size_t len)
+static bool validateutf8(const char *str, size_t len, size_t *invalid_index)
{
size_t i = 0;
@@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len)
size = 3;
else if ((c & 0xF8) == 0xF0)
size = 4;
- else
+ else {
/* Invalid UTF-8 sequence */
+ if (invalid_index)
+ *invalid_index = i;
return false;
+ }
/* Check the following bytes to ensure they have the correct
* format.
*/
for (size_t j = 1; j < size; ++j) {
- if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+ if (i + j >= len || (str[i + j] & 0xC0) != 0x80) {
/* Invalid UTF-8 sequence */
+ if (invalid_index)
+ *invalid_index = i;
return false;
+ }
}
/* Move to the next character */
@@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len)
return true;
}
+bool strisutf8(const char *str, size_t len)
+{
+ return validateutf8(str, len, NULL);
+}
+
bool argsisutf8(int argc, char *argv[])
{
for (int i = 0; i < argc; i++) {
@@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[])
char *strtoutf8(char *str, size_t len)
{
- size_t i = 0;
-
- while (i < len) {
- unsigned char c = str[i];
- size_t size = 0;
-
- /* Check the first byte to determine the number of bytes in the
- * UTF-8 character.
- */
- if ((c & 0x80) == 0x00)
- size = 1;
- else if ((c & 0xE0) == 0xC0)
- size = 2;
- else if ((c & 0xF0) == 0xE0)
- size = 3;
- else if ((c & 0xF8) == 0xF0)
- size = 4;
- else
- /* Invalid UTF-8 sequence */
- goto done;
-
- /* Check the following bytes to ensure they have the correct
- * format.
- */
- for (size_t j = 1; j < size; ++j) {
- if (i + j > len || (str[i + j] & 0xC0) != 0x80)
- /* Invalid UTF-8 sequence */
- goto done;
- }
+ size_t invalid_index = 0;
- /* Move to the next character */
- i += size;
- }
+ if (!validateutf8(str, len, &invalid_index))
+ /* Truncate to the longest valid UTF-8 string */
+ memset(str + invalid_index, 0, len - invalid_index);
-done:
- /* Truncate to the longest valid UTF-8 string */
- memset(str + i, 0, len - i);
return str;
}
--
2.43.0
next reply other threads:[~2025-07-09 9:41 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-09 9:40 Frédéric Danis [this message]
2025-07-09 11:03 ` [BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8 bluez.test.bot
2025-07-09 13:11 ` [PATCH BlueZ] " Luiz Augusto von Dentz
2025-07-09 13:30 ` Frédéric Danis
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250709094055.516584-1-frederic.danis@collabora.com \
--to=frederic.danis@collabora.com \
--cc=linux-bluetooth@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox