qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Markus Armbruster <armbru@redhat.com>
To: qemu-devel@nongnu.org
Cc: marcandre.lureau@redhat.com, mdroth@linux.vnet.ibm.com,
	eblake@redhat.com
Subject: [Qemu-devel] [PATCH v3 29/58] json: Fix \uXXXX for surrogate pairs
Date: Thu, 23 Aug 2018 18:39:56 +0200	[thread overview]
Message-ID: <20180823164025.12553-30-armbru@redhat.com> (raw)
In-Reply-To: <20180823164025.12553-1-armbru@redhat.com>

The JSON parser treats each half of a surrogate pair as unpaired
surrogate.  Fix it to recognize surrogate pairs.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 qobject/json-parser.c | 60 ++++++++++++++++++++++++++++---------------
 tests/check-qjson.c   |  3 +--
 2 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index e49da192fe..73e6ad7458 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -64,16 +64,27 @@ static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
     error_setg(&ctxt->err, "JSON parse error, %s", message);
 }
 
-static int hex2decimal(char ch)
+static int cvt4hex(const char *s)
 {
-    if (ch >= '0' && ch <= '9') {
-        return (ch - '0');
-    } else if (ch >= 'a' && ch <= 'f') {
-        return 10 + (ch - 'a');
-    } else if (ch >= 'A' && ch <= 'F') {
-        return 10 + (ch - 'A');
+    int cp, i;
+
+    cp = 0;
+    for (i = 0; i < 4; i++) {
+        if (!qemu_isxdigit(s[i])) {
+            return -1;
+        }
+        cp <<= 4;
+        if (s[i] >= '0' && s[i] <= '9') {
+            cp |= s[i] - '0';
+        } else if (s[i] >= 'a' && s[i] <= 'f') {
+            cp |= 10 + s[i] - 'a';
+        } else if (s[i] >= 'A' && s[i] <= 'F') {
+            cp |= 10 + s[i] - 'A';
+        } else {
+            return -1;
+        }
     }
-    abort();
+    return cp;
 }
 
 /**
@@ -115,7 +126,8 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
     const char *ptr = token->str;
     QString *str;
     char quote;
-    int cp, i;
+    const char *beg;
+    int cp, trailing;
     char *end;
     ssize_t len;
     char utf8_buf[5];
@@ -127,7 +139,7 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
     while (*ptr != quote) {
         assert(*ptr);
         if (*ptr == '\\') {
-            ptr++;
+            beg = ptr++;
             switch (*ptr++) {
             case '"':
                 qstring_append_chr(str, '"');
@@ -157,22 +169,28 @@ static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
                 qstring_append_chr(str, '\t');
                 break;
             case 'u':
-                cp = 0;
-                for (i = 0; i < 4; i++) {
-                    if (!qemu_isxdigit(*ptr)) {
-                        parse_error(ctxt, token,
-                                    "invalid hex escape sequence in string");
-                        goto out;
+                cp = cvt4hex(ptr);
+                ptr += 4;
+
+                /* handle surrogate pairs */
+                if (cp >= 0xD800 && cp <= 0xDBFF
+                    && ptr[0] == '\\' && ptr[1] == 'u') {
+                    /* leading surrogate followed by \u */
+                    cp = 0x10000 + ((cp & 0x3FF) << 10);
+                    trailing = cvt4hex(ptr + 2);
+                    if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
+                        /* followed by trailing surrogate */
+                        cp |= trailing & 0x3FF;
+                        ptr += 6;
+                    } else {
+                        cp = -1; /* invalid */
                     }
-                    cp <<= 4;
-                    cp |= hex2decimal(*ptr);
-                    ptr++;
                 }
 
                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
                     parse_error(ctxt, token,
-                                "\\u%.4s is not a valid Unicode character",
-                                ptr - 3);
+                                "%.*s is not a valid Unicode character",
+                                (int)(ptr - beg), beg);
                     goto out;
                 }
                 qstring_append(str, utf8_buf);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 4abb5847ad..343f8af36a 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -63,8 +63,7 @@ static void escaped_string(void)
         { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
         { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
-          /* bug: want \xF0\x9D\x84\x9E */
-          NULL },
+          "quadruple byte utf-8 \xF0\x9D\x84\x9E" },
         { "\\", NULL },
         { "\\z", NULL },
         { "\\ux", NULL },
-- 
2.17.1

  parent reply	other threads:[~2018-08-23 16:42 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-08-23 16:39 [Qemu-devel] [PATCH v3 00/58] json: Fixes, error reporting improvements, cleanups Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 01/58] docs/interop/qmp-spec: How to force known good parser state Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 02/58] check-qjson: Cover multiple JSON objects in same string Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 03/58] check-qjson: Cover blank and lexically erroneous input Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 04/58] check-qjson: Cover whitespace more thoroughly Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 05/58] qmp-cmd-test: Split off qmp-test Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 06/58] qmp-test: Cover syntax and lexical errors Markus Armbruster
2018-08-23 17:05   ` Eric Blake
2018-08-24 19:32     ` Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 07/58] test-qga: Clean up how we test QGA synchronization Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 08/58] check-qjson: Cover escaped characters more thoroughly, part 1 Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 09/58] check-qjson: Streamline escaped_string()'s test strings Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 10/58] check-qjson: Cover escaped characters more thoroughly, part 2 Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 11/58] check-qjson: Consolidate partly redundant string tests Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 12/58] check-qjson: Cover UTF-8 in single quoted strings Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 13/58] check-qjson: Simplify utf8_string() Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 14/58] check-qjson: Fix utf8_string() to test all invalid sequences Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 15/58] check-qjson qmp-test: Cover control characters more thoroughly Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 16/58] check-qjson: Cover interpolation " Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 17/58] json: Fix lexer to include the bad character in JSON_ERROR token Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 18/58] json: Reject unescaped control characters Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 19/58] json: Revamp lexer documentation Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 20/58] json: Tighten and simplify qstring_from_escaped_str()'s loop Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 21/58] check-qjson: Document we expect invalid UTF-8 to be rejected Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 22/58] json: Reject invalid UTF-8 sequences Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 23/58] json: Report first rather than last parse error Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 24/58] json: Leave rejecting invalid UTF-8 to parser Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 25/58] json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8") Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 26/58] json: Leave rejecting invalid escape sequences to parser Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 27/58] json: Simplify parse_string() Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 28/58] json: Reject invalid \uXXXX, fix \u0000 Markus Armbruster
2018-08-23 16:39 ` Markus Armbruster [this message]
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 30/58] check-qjson: Fix and enable utf8_string()'s disabled part Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 31/58] json: remove useless return value from lexer/parser Markus Armbruster
2018-08-23 16:39 ` [Qemu-devel] [PATCH v3 32/58] json-parser: simplify and avoid JSONParserContext allocation Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 33/58] json: Have lexer call streamer directly Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 34/58] json: Redesign the callback to consume JSON values Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 35/58] json: Don't pass null @tokens to json_parser_parse() Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 36/58] json: Don't create JSON_ERROR tokens that won't be used Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 37/58] json: Rename token JSON_ESCAPE & friends to JSON_INTERP Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 38/58] json: Treat unwanted interpolation as lexical error Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 39/58] json: Pass lexical errors and limit violations to callback Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 40/58] json: Leave rejecting invalid interpolation to parser Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 41/58] json: Replace %I64d, %I64u by %PRId64, %PRIu64 Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 42/58] json: Improve names of lexer states related to numbers Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 43/58] qjson: Fix qobject_from_json() & friends for multiple values Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 44/58] json: Fix latent parser aborts at end of input Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 45/58] json: Fix streamer not to ignore trailing unterminated structures Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 46/58] json: Assert json_parser_parse() consumes all tokens on success Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 47/58] qjson: Have qobject_from_json() & friends reject empty and blank Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 48/58] json: Enforce token count and size limits more tightly Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 49/58] json: Streamline json_message_process_token() Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 50/58] json: Unbox tokens queue in JSONMessageParser Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 51/58] json: Make JSONToken opaque outside json-parser.c Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 52/58] qobject: Drop superfluous includes of qemu-common.h Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 53/58] json: Clean up headers Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 54/58] tests/drive_del-test: Fix harmless JSON interpolation bug Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 55/58] json: Keep interpolation state in JSONParserContext Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 56/58] json: Improve safety of qobject_from_jsonf_nofail() & friends Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 57/58] json: Support %% in JSON strings when interpolating Markus Armbruster
2018-08-23 16:40 ` [Qemu-devel] [PATCH v3 58/58] json: Update references to RFC 7159 to RFC 8259 Markus Armbruster
2018-08-23 17:39   ` Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180823164025.12553-30-armbru@redhat.com \
    --to=armbru@redhat.com \
    --cc=eblake@redhat.com \
    --cc=marcandre.lureau@redhat.com \
    --cc=mdroth@linux.vnet.ibm.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).