diff options
author | default <nobody@localhost> | 2022-09-20 07:15:39 +0200 |
---|---|---|
committer | default <nobody@localhost> | 2022-09-20 07:15:39 +0200 |
commit | b97f4c7a90125812b6d17d5b9f2450708e53a4ba (patch) | |
tree | 9d4b37157b80c315b66aa97637f948715c7f476a /xs_json.h | |
parent | 5e438f8353db63c4dca5037664e10730144b0922 (diff) |
Improved Unicode surrogate pairs parsing in xs_json.
Diffstat (limited to 'xs_json.h')
-rw-r--r-- | xs_json.h | 27 |
1 files changed, 21 insertions, 6 deletions
@@ -229,7 +229,7 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t) while ((c = *s) != '"' && c != '\0') { char tmp[5]; - int i; + int cp, i; if (c == '\\') { s++; @@ -240,15 +240,30 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t) case 't': c = '\t'; break; case 'u': /* Unicode codepoint as an hex char */ s++; - tmp[0] = (char)*s; s++; - tmp[1] = (char)*s; s++; - tmp[2] = (char)*s; s++; - tmp[3] = (char)*s; + memcpy(tmp, s, 4); + s += 3; tmp[4] = '\0'; + xs_debug(); sscanf(tmp, "%04x", &i); - v = xs_utf8_enc(v, i); + if (i >= 0xd800 && i <= 0xdfff) { + /* it's a surrogate pair */ + cp = (i & 0x3ff) << 10; + + /* skip to the next value */ + s += 3; + memcpy(tmp, s, 4); + s += 3; + + sscanf(tmp, "%04x", &i); + cp |= (i & 0x3ff); + cp += 0x10000; + } + else + cp = i; + + v = xs_utf8_enc(v, cp); c = '\0'; break; |