linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Roy Franz <roy.franz@linaro.org>
To: linux-kernel@vger.kernel.org, linux-efi@vger.kernel.org,
	matt.fleming@intel.com
Cc: leif.lindholm@linaro.org, grant.likely@linaro.org,
	msalter@redhat.com, "H. Peter Anvin" <hpa@zytor.com>,
	Roy Franz <roy.franz@linaro.org>
Subject: [PATCH 10/18] Do proper conversion from UTF-16 to UTF-8
Date: Sun, 22 Sep 2013 15:45:34 -0700	[thread overview]
Message-ID: <1379889942-3135-11-git-send-email-roy.franz@linaro.org> (raw)
In-Reply-To: <1379889942-3135-1-git-send-email-roy.franz@linaro.org>

From: "H. Peter Anvin" <hpa@zytor.com>

Improve the conversion of the UTF-16 EFI command line
to UTF-8 for passing to the kernel.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
---
 arch/x86/boot/compressed/eboot.c       |    3 +-
 drivers/firmware/efi/efi-stub-helper.c |   92 ++++++++++++++++++++++++--------
 2 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 5e708c0..4723dc89 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -486,8 +486,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
 	hdr->type_of_loader = 0x21;
 
 	/* Convert unicode cmdline to ascii */
-	cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image,
-						   &options_size);
+	cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
 	if (!cmdline_ptr)
 		goto fail;
 	hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c
index 335d17d..8331892 100644
--- a/drivers/firmware/efi/efi-stub-helper.c
+++ b/drivers/firmware/efi/efi-stub-helper.c
@@ -548,61 +548,111 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
 
 	return status;
 }
-/* Convert the unicode UEFI command line to ASCII to pass to kernel.
+
+/*
+ * Get the number of UTF-8 bytes corresponding to an UTF-16 character.
+ * This overestimates for surrogates, but that is okay.
+ */
+static int efi_utf8_bytes(u16 c)
+{
+	return 1 + (c >= 0x80) + (c >= 0x800);
+}
+
+/*
+ * Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
+ */
+static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
+{
+	unsigned int c;
+
+	while (n--) {
+		c = *src++;
+		if (n && c >= 0xd800 && c <= 0xdbff &&
+		    *src >= 0xdc00 && *src <= 0xdfff) {
+			c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff);
+			src++;
+			n--;
+		}
+		if (c >= 0xd800 && c <= 0xdfff)
+			c = 0xfffd; /* Unmatched surrogate */
+		if (c < 0x80) {
+			*dst++ = c;
+			continue;
+		}
+		if (c < 0x800) {
+			*dst++ = 0xc0 + (c >> 6);
+			goto t1;
+		}
+		if (c < 0x10000) {
+			*dst++ = 0xe0 + (c >> 12);
+			goto t2;
+		}
+		*dst++ = 0xf0 + (c >> 18);
+		*dst++ = 0x80 + ((c >> 12) & 0x3f);
+t2:
+		*dst++ = 0x80 + ((c >> 6) & 0x3f);
+t1:
+		*dst++ = 0x80 + (c & 0x3f);
+	}
+
+	return dst;
+}
+
+/*
+ * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
  * Returns NULL on error.
  */
-static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg,
-				      efi_loaded_image_t *image,
-				      int *cmd_line_len)
+static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
+				 efi_loaded_image_t *image,
+				 int *cmd_line_len)
 {
-	u16 *s2;
+	const u16 *s2;
 	u8 *s1 = NULL;
 	unsigned long cmdline_addr = 0;
 	int load_options_size = image->load_options_size / 2; /* ASCII */
-	void *options = image->load_options;
-	int options_size = 0;
+	const u16 *options = image->load_options;
+	int options_bytes = 0;	/* UTF-8 bytes */
+	int options_chars = 0;	/* UTF-16 chars */
 	efi_status_t status;
-	int i;
 	u16 zero = 0;
 
 	if (options) {
 		s2 = options;
-		while (*s2 && *s2 != '\n' && options_size < load_options_size) {
+		while (*s2 && *s2 != '\n' && options_bytes < load_options_size) {
+			options_bytes += efi_utf8_bytes(*s2);
 			s2++;
-			options_size++;
 		}
+		options_chars = s2 - options;
 	}
 
-	if (options_size == 0) {
-		/* No command line options, so return empty string*/
-		options_size = 1;
+	if (!options_chars) {
+		/* No command line options, so return empty string */
 		options = &zero;
 	}
 
-	options_size++;  /* NUL termination */
+	options_bytes++;	/* NUL termination */
+
 #ifdef CONFIG_ARM
 	/* For ARM, allocate at a high address to avoid reserved
 	 * regions at low addresses that we don't know the specfics of
 	 * at the time we are processing the command line.
 	 */
-	status = efi_high_alloc(sys_table_arg, options_size, 0,
+	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr, 0xfffff000);
 #else
-	status = efi_low_alloc(sys_table_arg, options_size, 0,
+	status = efi_low_alloc(sys_table_arg, options_bytes, 0,
 			    &cmdline_addr);
 #endif
 	if (status != EFI_SUCCESS)
 		return NULL;
 
 	s1 = (u8 *)cmdline_addr;
-	s2 = (u16 *)options;
-
-	for (i = 0; i < options_size - 1; i++)
-		*s1++ = *s2++;
+	s2 = (const u16 *)options;
 
+	s1 = efi_utf16_to_utf8(s1, s2, options_chars);
 	*s1 = '\0';
 
-	*cmd_line_len = options_size;
+	*cmd_line_len = options_bytes;
 	return (char *)cmdline_addr;
 }
-- 
1.7.10.4


  parent reply	other threads:[~2013-09-22 22:47 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-22 22:45 [PATCH V5 00/18] ARM EFI stub common code Roy Franz
2013-09-22 22:45 ` [PATCH 01/18] EFI stub documentation updates Roy Franz
2013-09-22 22:45 ` [PATCH 02/18] Add proper definitions for some EFI function pointers Roy Franz
2013-09-22 22:45 ` [PATCH 03/18] Move common EFI stub code from x86 arch code to common location Roy Franz
2013-09-22 22:45 ` [PATCH 04/18] Add system table pointer argument to shared functions Roy Franz
2013-09-22 22:45 ` [PATCH 05/18] Rename memory allocation/free functions Roy Franz
2013-09-22 22:45 ` [PATCH 06/18] Enforce minimum alignment of 1 page on allocations Roy Franz
2013-09-25 12:01   ` Matt Fleming
2013-09-22 22:45 ` [PATCH 07/18] Move relocate_kernel() to shared file Roy Franz
2013-09-22 22:45 ` [PATCH 08/18] Generalize relocate_kernel() for use by other architectures Roy Franz
2013-09-25 12:01   ` Matt Fleming
2013-09-22 22:45 ` [PATCH 09/18] Move unicode to ASCII conversion to shared function Roy Franz
2013-09-25 12:01   ` Matt Fleming
2013-09-22 22:45 ` Roy Franz [this message]
2013-09-22 22:54   ` [PATCH 10/18] Do proper conversion from UTF-16 to UTF-8 H. Peter Anvin
2013-09-22 23:07     ` Roy Franz
2013-09-23  0:24       ` H. Peter Anvin
2013-09-25 12:01         ` Matt Fleming
2013-09-25 14:15           ` Roy Franz
2013-09-22 22:45 ` [PATCH 11/18] Rename __get_map() to efi_get_memory_map() Roy Franz
2013-09-22 22:45 ` [PATCH 12/18] generalize efi_get_memory_map() Roy Franz
2013-09-22 22:45 ` [PATCH 13/18] use efi_get_memory_map() to get final map for x86 Roy Franz
2013-09-22 22:45 ` [PATCH 14/18] Allow efi_free() to be called with size of 0, and do nothing in that case Roy Franz
2013-09-22 22:45 ` [PATCH 15/18] Generalize handle_ramdisks() and rename to handle_cmdline_files() Roy Franz
2013-09-22 22:45 ` [PATCH 16/18] Renames in handle_cmdline_files() to complete generalization Roy Franz
2013-09-22 22:45 ` [PATCH 17/18] Fix types in EFI calls to match EFI function definitions Roy Franz
2013-09-22 22:45 ` [PATCH 18/18] resolve warnings found on ARM compile Roy Franz
2013-09-25 12:11 ` [PATCH V5 00/18] ARM EFI stub common code Matt Fleming
2013-09-25 12:21   ` Matt Fleming

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1379889942-3135-11-git-send-email-roy.franz@linaro.org \
    --to=roy.franz@linaro.org \
    --cc=grant.likely@linaro.org \
    --cc=hpa@zytor.com \
    --cc=leif.lindholm@linaro.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=msalter@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).