public inbox for linux-bluetooth@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8
@ 2025-07-09  9:40 Frédéric Danis
  2025-07-09 11:03 ` [BlueZ] " bluez.test.bot
  2025-07-09 13:11 ` [PATCH BlueZ] " Luiz Augusto von Dentz
  0 siblings, 2 replies; 4+ messages in thread
From: Frédéric Danis @ 2025-07-09  9:40 UTC (permalink / raw)
  To: linux-bluetooth

Move duplicate code to static validateutf8() and fix boundary access
on multi-byte character check.
---
 src/shared/util.c | 56 +++++++++++++++--------------------------------
 1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/src/shared/util.c b/src/shared/util.c
index 4780f26b6..36c06188f 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -1909,7 +1909,7 @@ char *strstrip(char *str)
 	return str;
 }
 
-bool strisutf8(const char *str, size_t len)
+static bool validateutf8(const char *str, size_t len, size_t *invalid_index)
 {
 	size_t i = 0;
 
@@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len)
 			size = 3;
 		else if ((c & 0xF8) == 0xF0)
 			size = 4;
-		else
+		else {
 			/* Invalid UTF-8 sequence */
+			if (invalid_index)
+				*invalid_index = i;
 			return false;
+		}
 
 		/* Check the following bytes to ensure they have the correct
 		 * format.
 		 */
 		for (size_t j = 1; j < size; ++j) {
-			if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+			if (i + j >= len || (str[i + j] & 0xC0) != 0x80) {
 				/* Invalid UTF-8 sequence */
+				if (invalid_index)
+					*invalid_index = i;
 				return false;
+			}
 		}
 
 		/* Move to the next character */
@@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len)
 	return true;
 }
 
+bool strisutf8(const char *str, size_t len)
+{
+	return validateutf8(str, len, NULL);
+}
+
 bool argsisutf8(int argc, char *argv[])
 {
 	for (int i = 0; i < argc; i++) {
@@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[])
 
 char *strtoutf8(char *str, size_t len)
 {
-	size_t i = 0;
-
-	while (i < len) {
-		unsigned char c = str[i];
-		size_t size = 0;
-
-		/* Check the first byte to determine the number of bytes in the
-		 * UTF-8 character.
-		 */
-		if ((c & 0x80) == 0x00)
-			size = 1;
-		else if ((c & 0xE0) == 0xC0)
-			size = 2;
-		else if ((c & 0xF0) == 0xE0)
-			size = 3;
-		else if ((c & 0xF8) == 0xF0)
-			size = 4;
-		else
-			/* Invalid UTF-8 sequence */
-			goto done;
-
-		/* Check the following bytes to ensure they have the correct
-		 * format.
-		 */
-		for (size_t j = 1; j < size; ++j) {
-			if (i + j > len || (str[i + j] & 0xC0) != 0x80)
-				/* Invalid UTF-8 sequence */
-				goto done;
-		}
+	size_t invalid_index = 0;
 
-		/* Move to the next character */
-		i += size;
-	}
+	if (!validateutf8(str, len, &invalid_index))
+		/* Truncate to the longest valid UTF-8 string */
+		memset(str + invalid_index, 0, len - invalid_index);
 
-done:
-	/* Truncate to the longest valid UTF-8 string */
-	memset(str + i, 0, len - i);
 	return str;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8
  2025-07-09  9:40 [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8 Frédéric Danis
@ 2025-07-09 11:03 ` bluez.test.bot
  2025-07-09 13:11 ` [PATCH BlueZ] " Luiz Augusto von Dentz
  1 sibling, 0 replies; 4+ messages in thread
From: bluez.test.bot @ 2025-07-09 11:03 UTC (permalink / raw)
  To: linux-bluetooth, frederic.danis

[-- Attachment #1: Type: text/plain, Size: 1261 bytes --]

This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=980429

---Test result---

Test Summary:
CheckPatch                    PENDING   0.36 seconds
GitLint                       PENDING   0.55 seconds
BuildEll                      PASS      20.07 seconds
BluezMake                     PASS      2575.82 seconds
MakeCheck                     PASS      20.22 seconds
MakeDistcheck                 PASS      182.18 seconds
CheckValgrind                 PASS      233.93 seconds
CheckSmatch                   PASS      302.45 seconds
bluezmakeextell               PASS      126.73 seconds
IncrementalBuild              PENDING   0.26 seconds
ScanBuild                     PASS      899.39 seconds

Details
##############################
Test: CheckPatch - PENDING
Desc: Run checkpatch.pl script
Output:

##############################
Test: GitLint - PENDING
Desc: Run gitlint
Output:

##############################
Test: IncrementalBuild - PENDING
Desc: Incremental build with the patches in the series
Output:



---
Regards,
Linux Bluetooth


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8
  2025-07-09  9:40 [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8 Frédéric Danis
  2025-07-09 11:03 ` [BlueZ] " bluez.test.bot
@ 2025-07-09 13:11 ` Luiz Augusto von Dentz
  2025-07-09 13:30   ` Frédéric Danis
  1 sibling, 1 reply; 4+ messages in thread
From: Luiz Augusto von Dentz @ 2025-07-09 13:11 UTC (permalink / raw)
  To: Frédéric Danis; +Cc: linux-bluetooth

Hi Frédéric,

On Wed, Jul 9, 2025 at 5:46 AM Frédéric Danis
<frederic.danis@collabora.com> wrote:
>
> Move duplicate code to static validateutf8() and fix boundary access
> on multi-byte character check.
> ---
>  src/shared/util.c | 56 +++++++++++++++--------------------------------
>  1 file changed, 18 insertions(+), 38 deletions(-)
>
> diff --git a/src/shared/util.c b/src/shared/util.c
> index 4780f26b6..36c06188f 100644
> --- a/src/shared/util.c
> +++ b/src/shared/util.c
> @@ -1909,7 +1909,7 @@ char *strstrip(char *str)
>         return str;
>  }
>
> -bool strisutf8(const char *str, size_t len)
> +static bool validateutf8(const char *str, size_t len, size_t *invalid_index)
>  {
>         size_t i = 0;
>
> @@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len)
>                         size = 3;
>                 else if ((c & 0xF8) == 0xF0)
>                         size = 4;
> -               else
> +               else {
>                         /* Invalid UTF-8 sequence */
> +                       if (invalid_index)
> +                               *invalid_index = i;
>                         return false;
> +               }
>
>                 /* Check the following bytes to ensure they have the correct
>                  * format.
>                  */
>                 for (size_t j = 1; j < size; ++j) {
> -                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
> +                       if (i + j >= len || (str[i + j] & 0xC0) != 0x80) {
>                                 /* Invalid UTF-8 sequence */
> +                               if (invalid_index)
> +                                       *invalid_index = i;
>                                 return false;
> +                       }
>                 }
>
>                 /* Move to the next character */
> @@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len)
>         return true;
>  }
>
> +bool strisutf8(const char *str, size_t len)
> +{
> +       return validateutf8(str, len, NULL);
> +}
> +
>  bool argsisutf8(int argc, char *argv[])
>  {
>         for (int i = 0; i < argc; i++) {
> @@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[])
>
>  char *strtoutf8(char *str, size_t len)
>  {
> -       size_t i = 0;
> -
> -       while (i < len) {
> -               unsigned char c = str[i];
> -               size_t size = 0;
> -
> -               /* Check the first byte to determine the number of bytes in the
> -                * UTF-8 character.
> -                */
> -               if ((c & 0x80) == 0x00)
> -                       size = 1;
> -               else if ((c & 0xE0) == 0xC0)
> -                       size = 2;
> -               else if ((c & 0xF0) == 0xE0)
> -                       size = 3;
> -               else if ((c & 0xF8) == 0xF0)
> -                       size = 4;
> -               else
> -                       /* Invalid UTF-8 sequence */
> -                       goto done;
> -
> -               /* Check the following bytes to ensure they have the correct
> -                * format.
> -                */
> -               for (size_t j = 1; j < size; ++j) {
> -                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
> -                               /* Invalid UTF-8 sequence */
> -                               goto done;
> -               }
> +       size_t invalid_index = 0;
>
> -               /* Move to the next character */
> -               i += size;
> -       }
> +       if (!validateutf8(str, len, &invalid_index))
> +               /* Truncate to the longest valid UTF-8 string */
> +               memset(str + invalid_index, 0, len - invalid_index);
>
> -done:
> -       /* Truncate to the longest valid UTF-8 string */
> -       memset(str + i, 0, len - i);
>         return str;
>  }
> --
> 2.43.0
>

I did something similar yesterday:

https://patchwork.kernel.org/project/bluetooth/patch/20250708174628.2949030-1-luiz.dentz@gmail.com/

Let me know if you have any comments.


-- 
Luiz Augusto von Dentz

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8
  2025-07-09 13:11 ` [PATCH BlueZ] " Luiz Augusto von Dentz
@ 2025-07-09 13:30   ` Frédéric Danis
  0 siblings, 0 replies; 4+ messages in thread
From: Frédéric Danis @ 2025-07-09 13:30 UTC (permalink / raw)
  To: Luiz Augusto von Dentz; +Cc: linux-bluetooth

Hi Luiz,

On 09/07/2025 15:11, Luiz Augusto von Dentz wrote:
> Hi Frédéric,
>
> On Wed, Jul 9, 2025 at 5:46 AM Frédéric Danis
> <frederic.danis@collabora.com> wrote:
>> Move duplicate code to static validateutf8() and fix boundary access
>> on multi-byte character check.
>> ---
>>   src/shared/util.c | 56 +++++++++++++++--------------------------------
>>   1 file changed, 18 insertions(+), 38 deletions(-)
>>
>> diff --git a/src/shared/util.c b/src/shared/util.c
>> index 4780f26b6..36c06188f 100644
>> --- a/src/shared/util.c
>> +++ b/src/shared/util.c
>> @@ -1909,7 +1909,7 @@ char *strstrip(char *str)
>>          return str;
>>   }
>>
>> -bool strisutf8(const char *str, size_t len)
>> +static bool validateutf8(const char *str, size_t len, size_t *invalid_index)
>>   {
>>          size_t i = 0;
>>
>> @@ -1928,17 +1928,23 @@ bool strisutf8(const char *str, size_t len)
>>                          size = 3;
>>                  else if ((c & 0xF8) == 0xF0)
>>                          size = 4;
>> -               else
>> +               else {
>>                          /* Invalid UTF-8 sequence */
>> +                       if (invalid_index)
>> +                               *invalid_index = i;
>>                          return false;
>> +               }
>>
>>                  /* Check the following bytes to ensure they have the correct
>>                   * format.
>>                   */
>>                  for (size_t j = 1; j < size; ++j) {
>> -                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
>> +                       if (i + j >= len || (str[i + j] & 0xC0) != 0x80) {
>>                                  /* Invalid UTF-8 sequence */
>> +                               if (invalid_index)
>> +                                       *invalid_index = i;
>>                                  return false;
>> +                       }
>>                  }
>>
>>                  /* Move to the next character */
>> @@ -1948,6 +1954,11 @@ bool strisutf8(const char *str, size_t len)
>>          return true;
>>   }
>>
>> +bool strisutf8(const char *str, size_t len)
>> +{
>> +       return validateutf8(str, len, NULL);
>> +}
>> +
>>   bool argsisutf8(int argc, char *argv[])
>>   {
>>          for (int i = 0; i < argc; i++) {
>> @@ -1962,42 +1973,11 @@ bool argsisutf8(int argc, char *argv[])
>>
>>   char *strtoutf8(char *str, size_t len)
>>   {
>> -       size_t i = 0;
>> -
>> -       while (i < len) {
>> -               unsigned char c = str[i];
>> -               size_t size = 0;
>> -
>> -               /* Check the first byte to determine the number of bytes in the
>> -                * UTF-8 character.
>> -                */
>> -               if ((c & 0x80) == 0x00)
>> -                       size = 1;
>> -               else if ((c & 0xE0) == 0xC0)
>> -                       size = 2;
>> -               else if ((c & 0xF0) == 0xE0)
>> -                       size = 3;
>> -               else if ((c & 0xF8) == 0xF0)
>> -                       size = 4;
>> -               else
>> -                       /* Invalid UTF-8 sequence */
>> -                       goto done;
>> -
>> -               /* Check the following bytes to ensure they have the correct
>> -                * format.
>> -                */
>> -               for (size_t j = 1; j < size; ++j) {
>> -                       if (i + j > len || (str[i + j] & 0xC0) != 0x80)
>> -                               /* Invalid UTF-8 sequence */
>> -                               goto done;
>> -               }
>> +       size_t invalid_index = 0;
>>
>> -               /* Move to the next character */
>> -               i += size;
>> -       }
>> +       if (!validateutf8(str, len, &invalid_index))
>> +               /* Truncate to the longest valid UTF-8 string */
>> +               memset(str + invalid_index, 0, len - invalid_index);
>>
>> -done:
>> -       /* Truncate to the longest valid UTF-8 string */
>> -       memset(str + i, 0, len - i);
>>          return str;
>>   }
>> --
>> 2.43.0
>>
> I did something similar yesterday:
>
> https://patchwork.kernel.org/project/bluetooth/patch/20250708174628.2949030-1-luiz.dentz@gmail.com/
>
> Let me know if you have any comments.

Sorry, I missed it, you can discard mine

-- 
Frédéric Danis
Senior Software Engineer

Collabora Ltd.
Platinum Building, St John's Innovation Park, Cambridge CB4 0DS, United Kingdom
Registered in England & Wales, no. 5513718


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-07-09 13:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-09  9:40 [PATCH BlueZ] shared/util: Refactor code from strisutf8 and strtoutf8 Frédéric Danis
2025-07-09 11:03 ` [BlueZ] " bluez.test.bot
2025-07-09 13:11 ` [PATCH BlueZ] " Luiz Augusto von Dentz
2025-07-09 13:30   ` Frédéric Danis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox