summaryrefslogtreecommitdiff
path: root/xs_unicode.h
diff options
context:
space:
mode:
authordefault <nobody@localhost>2023-05-30 19:49:30 +0200
committerdefault <nobody@localhost>2023-05-30 19:49:30 +0200
commitca2e0fcd89599819b0a808aff5a8125df26c36d5 (patch)
tree2348818fab7939f3939fd98371b4c204442947d2 /xs_unicode.h
parent6c2ca0d40aab36e9f25a7145b9fba47286e74850 (diff)
Backport from xs.
Diffstat (limited to 'xs_unicode.h')
-rw-r--r--xs_unicode.h87
1 files changed, 68 insertions, 19 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 6f78d58..2f081ad 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -5,42 +5,91 @@
#define _XS_UNICODE_H
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
+ char *xs_utf8_dec(const char *str, unsigned int *cpoint);
#ifdef XS_IMPLEMENTATION
-/** utf-8 **/
+
+char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
+{
+ unsigned char *p = (unsigned char *)buf;
+
+ if (cpoint < 0x80) /* 1 byte char */
+ *p++ = cpoint & 0xff;
+ else {
+ if (cpoint < 0x800) /* 2 byte char */
+ *p++ = 0xc0 | (cpoint >> 6);
+ else {
+ if (cpoint < 0x10000) /* 3 byte char */
+ *p++ = 0xe0 | (cpoint >> 12);
+ else { /* 4 byte char */
+ *p++ = 0xf0 | (cpoint >> 18);
+ *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
+ }
+
+ *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
+ }
+
+ *p++ = 0x80 | (cpoint & 0x3f);
+ }
+
+ return (char *)p;
+}
+
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
-/* encodes an Unicode codepoint to utf8 */
+/* encodes an Unicode codepoint to utf-8 into str */
{
- unsigned char tmp[4];
- int n = 0;
+ char tmp[4], *p;
+
+ p = _xs_utf8_enc(tmp, cpoint);
- if (cpoint < 0x80)
- tmp[n++] = cpoint & 0xff;
+ return xs_append_m(str, tmp, p - tmp);
+}
+
+
+char *xs_utf8_dec(const char *str, unsigned int *cpoint)
+/* decodes an utf-8 char inside str into cpoint and returns the next position */
+{
+ unsigned char *p = (unsigned char *)str;
+ int c = *p++;
+ int cb = 0;
+
+ if ((c & 0x80) == 0) { /* 1 byte char */
+ *cpoint = c;
+ }
else
- if (cpoint < 0x800) {
- tmp[n++] = 0xc0 | (cpoint >> 6);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xe0) == 0xc0) { /* 2 byte char */
+ *cpoint = (c & 0x1f) << 6;
+ cb = 1;
}
else
- if (cpoint < 0x10000) {
- tmp[n++] = 0xe0 | (cpoint >> 12);
- tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xf0) == 0xe0) { /* 3 byte char */
+ *cpoint = (c & 0x0f) << 12;
+ cb = 2;
}
else
- if (cpoint < 0x200000) {
- tmp[n++] = 0xf0 | (cpoint >> 18);
- tmp[n++] = 0x80 | ((cpoint >> 12) & 0x3f);
- tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xf8) == 0xf0) { /* 4 byte char */
+ *cpoint = (c & 0x07) << 18;
+ cb = 3;
+ }
+
+ /* process the continuation bytes */
+ while (cb--) {
+ if ((*p & 0xc0) == 0x80)
+ *cpoint |= (*p++ & 0x3f) << (cb * 6);
+ else {
+ *cpoint = 0xfffd;
+ break;
+ }
}
- return xs_append_m(str, (char *)tmp, n);
+ return (char *)p;
}
+
#endif /* XS_IMPLEMENTATION */
#endif /* _XS_UNICODE_H */