All of lore.kernel.org
 help / color / mirror / Atom feed
From: "H. Peter Anvin" <hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>
To: Matt Fleming <matt-HNK1S37rvNbeXh+fF434Mdi2O/JbrIOy@public.gmane.org>
Cc: Roy Franz <roy.franz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>,
	Adam Borowski <kilobyte-b9QjgO8OEXPVItvQsEIGlw@public.gmane.org>,
	Linux Kernel Mailing List
	<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	linux-efi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	matt.fleming-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org,
	Leif Lindholm
	<leif.lindholm-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>,
	Mark Salter <msalter-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Subject: Re: [PATCH 09/17] Move unicode to ASCII conversion to shared function.
Date: Fri, 20 Sep 2013 10:00:12 -0500	[thread overview]
Message-ID: <523C62FC.8010907@zytor.com> (raw)
In-Reply-To: <20130920092713.GD4785-HNK1S37rvNbeXh+fF434Mdi2O/JbrIOy@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 634 bytes --]

On 09/20/2013 04:27 AM, Matt Fleming wrote:
> On Wed, 18 Sep, at 09:48:44PM, Roy Franz wrote:
>> Would it be acceptable to fix the naming/comments, and convert values
>> above 126 to '?'
>> in the current patchset, and address a more thorough fix in another patch set?
>> The ARM and ARM64 EFI stub patchsets that are mostly complete depend
>> on this one,
>> so getting this merged soon would be helpful.
> 
> Just fixing the function name and comments is enough for this patch
> series. Anything else should be separate.
> 

I just whipped up a patch to do proper UTF-16 to UTF-8 conversion.
Completely untested, of course.

	-hpa


[-- Attachment #2: 0001-efi-Handle-arbitrary-Unicode-characters.patch --]
[-- Type: text/x-patch, Size: 4945 bytes --]

>From 7d6cf630c1adbb9787a24c2994230373c2b20a8f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Date: Fri, 20 Sep 2013 09:55:39 -0500
Subject: [PATCH] efi: Handle arbitrary Unicode characters

Instead of truncating UTF-16 assuming all characters is ASCII,
properly convert it to UTF-8.

Signed-off-by: H. Peter Anvin <hpa-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
---
 arch/x86/boot/compressed/eboot.c       |  3 +-
 drivers/firmware/efi/efi-stub-helper.c | 89 ++++++++++++++++++++++++++--------
 2 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index bf56206..aacbe50 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -488,8 +488,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
-						   &options_size);
+	cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c
index c91e757..7d4c1a4 100644
--- a/drivers/firmware/efi/efi-stub-helper.c
+++ b/drivers/firmware/efi/efi-stub-helper.c
@@ -567,20 +567,72 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
 
 	return status;
 }
-/* Convert the unicode UEFI command line to ASCII to pass to kernel.
+
+/*
+ * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
+ * This overestimates for surrogates, but that is okay.
+ */
+static int efi_utf8_bytes(u16 c)
+{
+	return 1 + (c >= 0x80) + (c >= 0x800);
+}
+
+/*
+ * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
+ */
+static u8 *efi_utf8_to_utf16(u8 *dst, const u16 *src, int n)
+{
+	unsigned int c;
+
+	while (n--) {
+		c = *src++;
+		if (n && c >= 0xd800 && c <= 0xdbff &&
+		    *src >= 0xdc00 && *src <= 0xdfff) {
+			c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
+			src++;
+			n--;
+		}
+		if (c >= 0xd800 && c <= 0xdfff)
+			c = 0xfffd; /* Unmatched surrogate */
+		if (c < 0x80) {
+			*dst++ = c;
+			continue;
+		}
+		if (c < 0x800) {
+			*dst++ = 0xc0 + (c >> 6);
+			goto t1;
+		}
+		if (c < 0x10000) {
+			*dst++ = 0xe0 + (c >> 12);
+			goto t2;
+		}
+		*dst++ = 0xf0 + (c >> 18);
+		*dst++ = 0x80 + ((c >> 12) & 0x3f);
+	t2:
+		*dst++ = 0x80 + ((c >> 6) & 0x3f);
+	t1:
+		*dst++ = 0x80 + (c & 0x3f);
+	}
+
+	return dst;
+}
+
+/*
+ * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
-				      efi_loaded_image_t *image,
-				      int *cmd_line_len)
+static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+				 efi_loaded_image_t *image,
+				 int *cmd_line_len)
 {
-	u16 *s2;
+	const u16 *s2;
 	u8 *s1 = NULL;
 	unsigned long cmdline_addr = 0;
 	int load_options_size = image->load_options_size / 2; /* ASCII */
-	void *options = image->load_options;
-	int options_size = 0;
+	const void *options = image->load_options;
+	int options_bytes = 0;	/* UTF-8 bytes */
+	int options_chars = 0;	/* UTF-16 chars */
 	efi_status_t status;
 	int i;
 	u16 zero = 0;
@@ -588,40 +640,39 @@ static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
 	if (options) {
 		s2 = options;
 		while (*s2 && *s2 != '\n' && options_size < load_options_size) {
+			options_bytes += efi_utf8_bytes(*s2);
 			s2++;
-			options_size++;
 		}
+		options_chars = s2 - options;
 	}
 
-	if (options_size == 0) {
-		/* No command line options, so return empty string*/
-		options_size = 1;
+	if (!options_chars) {
+		/* No command line options, so return empty string */
 		options = &zero;
 	}
 
-	options_size++;  /* NUL termination */
+	options_bytes++;	/* NUL termination */
+
 #ifdef CONFIG_ARM
 	/* For ARM, allocate at a high address to avoid reserved
 	 * regions at low addresses that we don't know the specfics of
 	 * at the time we are processing the command line.
 	 */
-	status = efi_high_alloc(sys_table_arg, options_size, 0,
+	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr, 0xfffff000);
 #else
-	status = efi_low_alloc(sys_table_arg, options_size, 0,
+	status = efi_low_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr);
 #endif
 	if (status != EFI_SUCCESS)
 		return NULL;
 
 	s1 = (u8 *)cmdline_addr;
-	s2 = (u16 *)options;
-
-	for (i = 0; i < options_size - 1; i++)
-		*s1++ = *s2++;
+	s2 = (const u16 *)options;
 
+	s1 = efi_utf16_to_utf8(s1, s2, options_chars);
 	*s1 = '\0';
 
-	*cmd_line_len = options_size;
+	*cmd_line_len = options_bytes;
 	return (char *)cmdline_addr;
 }
-- 
1.7.11.7


WARNING: multiple messages have this Message-ID (diff)
From: "H. Peter Anvin" <hpa@zytor.com>
To: Matt Fleming <matt@console-pimps.org>
Cc: Roy Franz <roy.franz@linaro.org>,
	Adam Borowski <kilobyte@angband.pl>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	linux-efi@vger.kernel.org, matt.fleming@intel.com,
	Leif Lindholm <leif.lindholm@linaro.org>,
	Mark Salter <msalter@redhat.com>
Subject: Re: [PATCH 09/17] Move unicode to ASCII conversion to shared function.
Date: Fri, 20 Sep 2013 10:00:12 -0500	[thread overview]
Message-ID: <523C62FC.8010907@zytor.com> (raw)
In-Reply-To: <20130920092713.GD4785@console-pimps.org>

[-- Attachment #1: Type: text/plain, Size: 634 bytes --]

On 09/20/2013 04:27 AM, Matt Fleming wrote:
> On Wed, 18 Sep, at 09:48:44PM, Roy Franz wrote:
>> Would it be acceptable to fix the naming/comments, and convert values
>> above 126 to '?'
>> in the current patchset, and address a more thorough fix in another patch set?
>> The ARM and ARM64 EFI stub patchsets that are mostly complete depend
>> on this one,
>> so getting this merged soon would be helpful.
> 
> Just fixing the function name and comments is enough for this patch
> series. Anything else should be separate.
> 

I just whipped up a patch to do proper UTF-16 to UTF-8 conversion.
Completely untested, of course.

	-hpa


[-- Attachment #2: 0001-efi-Handle-arbitrary-Unicode-characters.patch --]
[-- Type: text/x-patch, Size: 4897 bytes --]

>From 7d6cf630c1adbb9787a24c2994230373c2b20a8f Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Fri, 20 Sep 2013 09:55:39 -0500
Subject: [PATCH] efi: Handle arbitrary Unicode characters

Instead of truncating UTF-16 assuming all characters is ASCII,
properly convert it to UTF-8.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/compressed/eboot.c       |  3 +-
 drivers/firmware/efi/efi-stub-helper.c | 89 ++++++++++++++++++++++++++--------
 2 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index bf56206..aacbe50 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -488,8 +488,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
-						   &options_size);
+	cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c
index c91e757..7d4c1a4 100644
--- a/drivers/firmware/efi/efi-stub-helper.c
+++ b/drivers/firmware/efi/efi-stub-helper.c
@@ -567,20 +567,72 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
 
 	return status;
 }
-/* Convert the unicode UEFI command line to ASCII to pass to kernel.
+
+/*
+ * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
+ * This overestimates for surrogates, but that is okay.
+ */
+static int efi_utf8_bytes(u16 c)
+{
+	return 1 + (c >= 0x80) + (c >= 0x800);
+}
+
+/*
+ * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
+ */
+static u8 *efi_utf8_to_utf16(u8 *dst, const u16 *src, int n)
+{
+	unsigned int c;
+
+	while (n--) {
+		c = *src++;
+		if (n && c >= 0xd800 && c <= 0xdbff &&
+		    *src >= 0xdc00 && *src <= 0xdfff) {
+			c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
+			src++;
+			n--;
+		}
+		if (c >= 0xd800 && c <= 0xdfff)
+			c = 0xfffd; /* Unmatched surrogate */
+		if (c < 0x80) {
+			*dst++ = c;
+			continue;
+		}
+		if (c < 0x800) {
+			*dst++ = 0xc0 + (c >> 6);
+			goto t1;
+		}
+		if (c < 0x10000) {
+			*dst++ = 0xe0 + (c >> 12);
+			goto t2;
+		}
+		*dst++ = 0xf0 + (c >> 18);
+		*dst++ = 0x80 + ((c >> 12) & 0x3f);
+	t2:
+		*dst++ = 0x80 + ((c >> 6) & 0x3f);
+	t1:
+		*dst++ = 0x80 + (c & 0x3f);
+	}
+
+	return dst;
+}
+
+/*
+ * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
-				      efi_loaded_image_t *image,
-				      int *cmd_line_len)
+static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+				 efi_loaded_image_t *image,
+				 int *cmd_line_len)
 {
-	u16 *s2;
+	const u16 *s2;
 	u8 *s1 = NULL;
 	unsigned long cmdline_addr = 0;
 	int load_options_size = image->load_options_size / 2; /* ASCII */
-	void *options = image->load_options;
-	int options_size = 0;
+	const void *options = image->load_options;
+	int options_bytes = 0;	/* UTF-8 bytes */
+	int options_chars = 0;	/* UTF-16 chars */
 	efi_status_t status;
 	int i;
 	u16 zero = 0;
@@ -588,40 +640,39 @@ static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
 	if (options) {
 		s2 = options;
 		while (*s2 && *s2 != '\n' && options_size < load_options_size) {
+			options_bytes += efi_utf8_bytes(*s2);
 			s2++;
-			options_size++;
 		}
+		options_chars = s2 - options;
 	}
 
-	if (options_size == 0) {
-		/* No command line options, so return empty string*/
-		options_size = 1;
+	if (!options_chars) {
+		/* No command line options, so return empty string */
 		options = &zero;
 	}
 
-	options_size++;  /* NUL termination */
+	options_bytes++;	/* NUL termination */
+
 #ifdef CONFIG_ARM
 	/* For ARM, allocate at a high address to avoid reserved
 	 * regions at low addresses that we don't know the specfics of
 	 * at the time we are processing the command line.
 	 */
-	status = efi_high_alloc(sys_table_arg, options_size, 0,
+	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr, 0xfffff000);
 #else
-	status = efi_low_alloc(sys_table_arg, options_size, 0,
+	status = efi_low_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr);
 #endif
 	if (status != EFI_SUCCESS)
 		return NULL;
 
 	s1 = (u8 *)cmdline_addr;
-	s2 = (u16 *)options;
-
-	for (i = 0; i < options_size - 1; i++)
-		*s1++ = *s2++;
+	s2 = (const u16 *)options;
 
+	s1 = efi_utf16_to_utf8(s1, s2, options_chars);
 	*s1 = '\0';
 
-	*cmd_line_len = options_size;
+	*cmd_line_len = options_bytes;
 	return (char *)cmdline_addr;
 }
-- 
1.7.11.7


  parent reply	other threads:[~2013-09-20 15:00 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-17  4:11 [PATCH V4 00/17] ARM EFI stub common code Roy Franz
2013-09-17  4:11 ` Roy Franz
2013-09-17  4:11 ` [PATCH 02/17] Add proper definitions for some EFI function pointers Roy Franz
2013-09-17  4:11 ` [PATCH 03/17] Move common EFI stub code from x86 arch code to common location Roy Franz
2013-09-17  4:11 ` [PATCH 04/17] Add system table pointer argument to shared functions Roy Franz
2013-09-17  4:11 ` [PATCH 05/17] Rename memory allocation/free functions Roy Franz
2013-09-17  4:11 ` [PATCH 06/17] Enforce minimum alignment of 1 page on allocations Roy Franz
2013-09-17  4:11 ` [PATCH 08/17] Generalize relocate_kernel() for use by other architectures Roy Franz
     [not found]   ` <1379391093-27948-9-git-send-email-roy.franz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2013-09-18 12:12     ` Matt Fleming
2013-09-18 12:12       ` Matt Fleming
     [not found]       ` <20130918121240.GJ3409-HNK1S37rvNbeXh+fF434Mdi2O/JbrIOy@public.gmane.org>
2013-09-18 16:31         ` Roy Franz
2013-09-18 16:31           ` Roy Franz
2013-09-17  4:11 ` [PATCH 09/17] Move unicode to ASCII conversion to shared function Roy Franz
     [not found]   ` <1379391093-27948-10-git-send-email-roy.franz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2013-09-19  3:44     ` Adam Borowski
2013-09-19  3:44       ` Adam Borowski
     [not found]       ` <20130919034406.GA26385-b9QjgO8OEXPVItvQsEIGlw@public.gmane.org>
2013-09-19  3:46         ` H. Peter Anvin
2013-09-19  3:46           ` H. Peter Anvin
2013-09-19  4:48         ` Roy Franz
2013-09-19  4:48           ` Roy Franz
     [not found]           ` <CAFECyb-tTaCEqUHUh23MaJf-P42ZpodFKeNG=kE+vmmi-gyKrw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2013-09-19 23:02             ` Adam Borowski
2013-09-19 23:02               ` Adam Borowski
2013-09-20  9:30               ` Matt Fleming
2013-09-20  9:27             ` Matt Fleming
2013-09-20  9:27               ` Matt Fleming
     [not found]               ` <20130920092713.GD4785-HNK1S37rvNbeXh+fF434Mdi2O/JbrIOy@public.gmane.org>
2013-09-20 15:00                 ` H. Peter Anvin [this message]
2013-09-20 15:00                   ` H. Peter Anvin
     [not found]                   ` <523C62FC.8010907-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>
2013-09-21 21:31                     ` Roy Franz
2013-09-21 21:31                       ` Roy Franz
2013-09-17  4:11 ` [PATCH 10/17] Rename __get_map() to efi_get_memory_map() Roy Franz
2013-09-17  4:11 ` [PATCH 11/17] generalize efi_get_memory_map() Roy Franz
2013-09-17  4:11 ` [PATCH 12/17] use efi_get_memory_map() to get final map for x86 Roy Franz
     [not found] ` <1379391093-27948-1-git-send-email-roy.franz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2013-09-17  4:11   ` [PATCH 01/17] EFI stub documentation updates Roy Franz
2013-09-17  4:11     ` Roy Franz
2013-09-17  4:11   ` [PATCH 07/17] Move relocate_kernel() to shared file Roy Franz
2013-09-17  4:11     ` Roy Franz
2013-09-17  4:11   ` [PATCH 13/17] Allow efi_free() to be called with size of 0, and do nothing in that case Roy Franz
2013-09-17  4:11     ` Roy Franz
2013-09-17  4:11   ` [PATCH 16/17] Fix types in EFI calls to match EFI function definitions Roy Franz
2013-09-17  4:11     ` Roy Franz
2013-09-17  4:11   ` [PATCH 17/17] resolve warnings found on ARM compile Roy Franz
2013-09-17  4:11     ` Roy Franz
2013-09-18 13:21   ` [PATCH V4 00/17] ARM EFI stub common code Matt Fleming
2013-09-18 13:21     ` Matt Fleming
2013-09-20 14:32   ` H. Peter Anvin
2013-09-20 14:32     ` H. Peter Anvin
2013-09-17  4:11 ` [PATCH 14/17] Generalize handle_ramdisks() and rename to handle_cmdline_files() Roy Franz
2013-09-17  4:11 ` [PATCH 15/17] Renames in handle_cmdline_files() to complete generalization Roy Franz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=523C62FC.8010907@zytor.com \
    --to=hpa-ymnouzjc4hwavxtiumwx3w@public.gmane.org \
    --cc=kilobyte-b9QjgO8OEXPVItvQsEIGlw@public.gmane.org \
    --cc=leif.lindholm-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org \
    --cc=linux-efi-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=matt-HNK1S37rvNbeXh+fF434Mdi2O/JbrIOy@public.gmane.org \
    --cc=matt.fleming-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=msalter-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=roy.franz-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.