summaryrefslogtreecommitdiff
path: root/xs_unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'xs_unicode.h')
-rw-r--r--xs_unicode.h116
1 files changed, 114 insertions, 2 deletions
diff --git a/xs_unicode.h b/xs_unicode.h
index 2e9a754..a5a1dcb 100644
--- a/xs_unicode.h
+++ b/xs_unicode.h
@@ -9,6 +9,7 @@
unsigned int xs_utf8_dec(const char **str);
int xs_unicode_width(unsigned int cpoint);
int xs_is_surrogate(unsigned int cpoint);
+ int xs_is_diacritic(unsigned int cpoint);
unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
unsigned int xs_surrogate_enc(unsigned int cpoint);
unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
@@ -22,7 +23,12 @@
int xs_unicode_is_alpha(unsigned int cpoint);
#ifdef _XS_H
+ xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset);
xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint);
+ xs_str *xs_utf8_to_upper(const char *str);
+ xs_str *xs_utf8_to_lower(const char *str);
+ xs_str *xs_utf8_to_nfd(const char *str);
+ xs_str *xs_utf8_to_nfc(const char *str);
#endif
#ifdef XS_IMPLEMENTATION
@@ -144,6 +150,12 @@ int xs_unicode_width(unsigned int cpoint)
}
+int xs_is_diacritic(unsigned int cpoint)
+{
+ return cpoint >= 0x300 && cpoint <= 0x36f;
+}
+
+
/** surrogate pairs **/
int xs_is_surrogate(unsigned int cpoint)
@@ -172,14 +184,27 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
#ifdef _XS_H
-xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint)
+xs_str *xs_utf8_insert(xs_str *str, unsigned int cpoint, int *offset)
/* encodes an Unicode codepoint to utf-8 into str */
{
char tmp[4];
int c = xs_utf8_enc(tmp, cpoint);
- return xs_append_m(str, tmp, c);
+ str = xs_insert_m(str, *offset, tmp, c);
+
+ *offset += c;
+
+ return str;
+}
+
+
+xs_str *xs_utf8_cat(xs_str *str, unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into str */
+{
+ int offset = strlen(str);
+
+ return xs_utf8_insert(str, cpoint, &offset);
}
#endif /* _XS_H */
@@ -232,6 +257,9 @@ unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
unsigned int xs_unicode_to_lower(unsigned int cpoint)
/* returns the cpoint to lowercase */
{
+ if (cpoint < 0x80)
+ return tolower(cpoint);
+
unsigned int *p = _xs_unicode_upper_search(cpoint);
return p == NULL ? cpoint : p[1];
@@ -241,6 +269,9 @@ unsigned int xs_unicode_to_lower(unsigned int cpoint)
unsigned int xs_unicode_to_upper(unsigned int cpoint)
/* returns the cpoint to uppercase */
{
+ if (cpoint < 0x80)
+ return toupper(cpoint);
+
unsigned int *p = _xs_unicode_lower_search(cpoint);
return p == NULL ? cpoint : p[0];
@@ -317,6 +348,87 @@ int xs_unicode_is_alpha(unsigned int cpoint)
}
+#ifdef _XS_H
+
+xs_str *xs_utf8_to_upper(const char *str)
+{
+ xs_str *s = xs_str_new(NULL);
+ unsigned int cpoint;
+ int offset = 0;
+
+ while ((cpoint = xs_utf8_dec(&str))) {
+ cpoint = xs_unicode_to_upper(cpoint);
+ s = xs_utf8_insert(s, cpoint, &offset);
+ }
+
+ return s;
+}
+
+
+xs_str *xs_utf8_to_lower(const char *str)
+{
+ xs_str *s = xs_str_new(NULL);
+ unsigned int cpoint;
+ int offset = 0;
+
+ while ((cpoint = xs_utf8_dec(&str))) {
+ cpoint = xs_unicode_to_lower(cpoint);
+ s = xs_utf8_insert(s, cpoint, &offset);
+ }
+
+ return s;
+}
+
+
+xs_str *xs_utf8_to_nfd(const char *str)
+{
+ xs_str *s = xs_str_new(NULL);
+ unsigned int cpoint;
+ int offset = 0;
+
+ while ((cpoint = xs_utf8_dec(&str))) {
+ unsigned int base;
+ unsigned int diac;
+
+ if (xs_unicode_nfd(cpoint, &base, &diac)) {
+ s = xs_utf8_insert(s, base, &offset);
+ s = xs_utf8_insert(s, diac, &offset);
+ }
+ else
+ s = xs_utf8_insert(s, cpoint, &offset);
+ }
+
+ return s;
+}
+
+
+xs_str *xs_utf8_to_nfc(const char *str)
+{
+ xs_str *s = xs_str_new(NULL);
+ unsigned int cpoint;
+ unsigned int base = 0;
+ int offset = 0;
+
+ while ((cpoint = xs_utf8_dec(&str))) {
+ if (xs_is_diacritic(cpoint)) {
+ if (xs_unicode_nfc(base, cpoint, &base))
+ continue;
+ }
+
+ if (base)
+ s = xs_utf8_insert(s, base, &offset);
+
+ base = cpoint;
+ }
+
+ if (base)
+ s = xs_utf8_insert(s, base, &offset);
+
+ return s;
+}
+
+#endif /* _XS_H */
+
#endif /* _XS_UNICODE_TBL_H */
#endif /* XS_IMPLEMENTATION */