summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--xs_unicode.h87
-rw-r--r--xs_version.h2
2 files changed, 69 insertions, 20 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 6f78d58..2f081ad 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -5,42 +5,91 @@
#define _XS_UNICODE_H
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
+ char *xs_utf8_dec(const char *str, unsigned int *cpoint);
#ifdef XS_IMPLEMENTATION
-/** utf-8 **/
+
+char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
+{
+ unsigned char *p = (unsigned char *)buf;
+
+ if (cpoint < 0x80) /* 1 byte char */
+ *p++ = cpoint & 0xff;
+ else {
+ if (cpoint < 0x800) /* 2 byte char */
+ *p++ = 0xc0 | (cpoint >> 6);
+ else {
+ if (cpoint < 0x10000) /* 3 byte char */
+ *p++ = 0xe0 | (cpoint >> 12);
+ else { /* 4 byte char */
+ *p++ = 0xf0 | (cpoint >> 18);
+ *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
+ }
+
+ *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
+ }
+
+ *p++ = 0x80 | (cpoint & 0x3f);
+ }
+
+ return (char *)p;
+}
+
xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
-/* encodes an Unicode codepoint to utf8 */
+/* encodes an Unicode codepoint to utf-8 into str */
{
- unsigned char tmp[4];
- int n = 0;
+ char tmp[4], *p;
+
+ p = _xs_utf8_enc(tmp, cpoint);
- if (cpoint < 0x80)
- tmp[n++] = cpoint & 0xff;
+ return xs_append_m(str, tmp, p - tmp);
+}
+
+
+char *xs_utf8_dec(const char *str, unsigned int *cpoint)
+/* decodes an utf-8 char inside str into cpoint and returns the next position */
+{
+ unsigned char *p = (unsigned char *)str;
+ int c = *p++;
+ int cb = 0;
+
+ if ((c & 0x80) == 0) { /* 1 byte char */
+ *cpoint = c;
+ }
else
- if (cpoint < 0x800) {
- tmp[n++] = 0xc0 | (cpoint >> 6);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xe0) == 0xc0) { /* 2 byte char */
+ *cpoint = (c & 0x1f) << 6;
+ cb = 1;
}
else
- if (cpoint < 0x10000) {
- tmp[n++] = 0xe0 | (cpoint >> 12);
- tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xf0) == 0xe0) { /* 3 byte char */
+ *cpoint = (c & 0x0f) << 12;
+ cb = 2;
}
else
- if (cpoint < 0x200000) {
- tmp[n++] = 0xf0 | (cpoint >> 18);
- tmp[n++] = 0x80 | ((cpoint >> 12) & 0x3f);
- tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
- tmp[n++] = 0x80 | (cpoint & 0x3f);
+ if ((c & 0xf8) == 0xf0) { /* 4 byte char */
+ *cpoint = (c & 0x07) << 18;
+ cb = 3;
+ }
+
+ /* process the continuation bytes */
+ while (cb--) {
+ if ((*p & 0xc0) == 0x80)
+ *cpoint |= (*p++ & 0x3f) << (cb * 6);
+ else {
+ *cpoint = 0xfffd;
+ break;
+ }
}
- return xs_append_m(str, (char *)tmp, n);
+ return (char *)p;
}
+
#endif /* XS_IMPLEMENTATION */
#endif /* _XS_UNICODE_H */
diff --git a/xs_version.h b/xs_version.h
index 099bc71..7a793d1 100644
--- a/xs_version.h
+++ b/xs_version.h
@@ -1 +1 @@
-/* 1948fa3c5f0df994170cd38b9144b99734b071e6 */
+/* 3588cbb7859917f1c5965254f8a53c3349c773ea */