summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordefault <nobody@localhost>2023-11-17 03:51:04 +0100
committerdefault <nobody@localhost>2023-11-17 03:51:04 +0100
commitacf3cdcf80da7c3443e202a02d4b626c13e9e8dd (patch)
treef9498dfd051d469d9e76c4b47c5704bfc9a79e48
parent57ab8df0076ba8d57dce16fdd3a5e38c744e1a04 (diff)
Backport from xs.
-rw-r--r--xs.h7
-rw-r--r--xs_json.h12
-rw-r--r--xs_unicode.h29
-rw-r--r--xs_version.h2
4 files changed, 39 insertions, 11 deletions
diff --git a/xs.h b/xs.h
index 7b85dcb..c0857bc 100644
--- a/xs.h
+++ b/xs.h
@@ -1180,6 +1180,8 @@ void *xs_memmem(const char *haystack, int h_size, const char *needle, int n_size
/** hex **/
+static char xs_hex_digits[] = "0123456789abcdef";
+
xs_str *xs_hex_enc(const xs_val *data, int size)
/* returns an hexdump of data */
{
@@ -1190,8 +1192,9 @@ xs_str *xs_hex_enc(const xs_val *data, int size)
p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
for (n = 0; n < size; n++) {
- snprintf(p, 3, "%02x", (unsigned char)data[n]);
- p += 2;
+ *p++ = xs_hex_digits[*data >> 4 & 0xf];
+ *p++ = xs_hex_digits[*data & 0xf];
+ data++;
}
*p = '\0';
diff --git a/xs_json.h b/xs_json.h
index e9dc052..03f7903 100644
--- a/xs_json.h
+++ b/xs_json.h
@@ -248,24 +248,20 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
break;
}
- if (cp >= 0xd800 && cp <= 0xdfff) {
- /* it's a surrogate pair */
- cp = (cp & 0x3ff) << 10;
-
+ if (xs_is_surrogate(cp)) {
/* \u must follow */
if (fgetc(f) != '\\' || fgetc(f) != 'u') {
*t = JS_ERROR;
break;
}
- unsigned int i;
- if (fscanf(f, "%04x", &i) != 1) {
+ unsigned int p2;
+ if (fscanf(f, "%04x", &p2) != 1) {
*t = JS_ERROR;
break;
}
- cp |= (i & 0x3ff);
- cp += 0x10000;
+ cp = xs_surrogate_dec(cp, p2);
}
/* replace dangerous control codes with their visual representations */
diff --git a/xs_unicode.h b/xs_unicode.h
index c7d6190..f5880f0 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -8,6 +8,9 @@
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
unsigned int xs_utf8_dec(char **str);
int xs_unicode_width(unsigned int cpoint);
+ int xs_is_surrogate(unsigned int cpoint);
+ unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
+ unsigned int xs_surrogate_enc(unsigned int cpoint);
unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
#define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
@@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint)
}
+/** surrogate pairs **/
+
+int xs_is_surrogate(unsigned int cpoint)
+/* checks if cpoint is the first element of a Unicode surrogate pair */
+{
+ return cpoint >= 0xd800 && cpoint <= 0xdfff;
+}
+
+
+unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2)
+/* "decodes" a surrogate pair into a codepoint */
+{
+ return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff);
+}
+
+
+unsigned int xs_surrogate_enc(unsigned int cpoint)
+/* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */
+{
+ unsigned int p1 = 0xd7c0 + (cpoint >> 10);
+ unsigned int p2 = 0xdc00 + (cpoint & 0x3ff);
+
+ return (p1 << 16) | p2;
+}
+
+
#ifdef _XS_UNICODE_TBL_H
/* include xs_unicode_tbl.h before this one to use these functions */
diff --git a/xs_version.h b/xs_version.h
index d888d29..42dc7d2 100644
--- a/xs_version.h
+++ b/xs_version.h
@@ -1 +1 @@
-/* 40d63c59610c642d1c8b2e2b94bbf5cdde69ad6a */
+/* 0932615dfe85e5d8544c4b2052eb66f3a430eb8c */