summaryrefslogtreecommitdiff
path: root/xs_json.h
diff options
context:
space:
mode:
authordefault <nobody@localhost>2022-09-20 07:15:39 +0200
committerdefault <nobody@localhost>2022-09-20 07:15:39 +0200
commitb97f4c7a90125812b6d17d5b9f2450708e53a4ba (patch)
tree9d4b37157b80c315b66aa97637f948715c7f476a /xs_json.h
parent5e438f8353db63c4dca5037664e10730144b0922 (diff)
Improved Unicode surrogate pairs parsing in xs_json.
Diffstat (limited to 'xs_json.h')
-rw-r--r--xs_json.h27
1 files changed, 21 insertions, 6 deletions
diff --git a/xs_json.h b/xs_json.h
index f6eaa2f..75a4e4d 100644
--- a/xs_json.h
+++ b/xs_json.h
@@ -229,7 +229,7 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t)
while ((c = *s) != '"' && c != '\0') {
char tmp[5];
- int i;
+ int cp, i;
if (c == '\\') {
s++;
@@ -240,15 +240,30 @@ d_char *_xs_json_loads_lexer(const char **json, js_type *t)
case 't': c = '\t'; break;
case 'u': /* Unicode codepoint as an hex char */
s++;
- tmp[0] = (char)*s; s++;
- tmp[1] = (char)*s; s++;
- tmp[2] = (char)*s; s++;
- tmp[3] = (char)*s;
+ memcpy(tmp, s, 4);
+ s += 3;
tmp[4] = '\0';
+ xs_debug();
sscanf(tmp, "%04x", &i);
- v = xs_utf8_enc(v, i);
+ if (i >= 0xd800 && i <= 0xdfff) {
+ /* it's a surrogate pair */
+ cp = (i & 0x3ff) << 10;
+
+ /* skip to the next value */
+ s += 3;
+ memcpy(tmp, s, 4);
+ s += 3;
+
+ sscanf(tmp, "%04x", &i);
+ cp |= (i & 0x3ff);
+ cp += 0x10000;
+ }
+ else
+ cp = i;
+
+ v = xs_utf8_enc(v, cp);
c = '\0';
break;