From acf3cdcf80da7c3443e202a02d4b626c13e9e8dd Mon Sep 17 00:00:00 2001 From: default Date: Fri, 17 Nov 2023 03:51:04 +0100 Subject: Backport from xs. --- xs_unicode.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'xs_unicode.h') diff --git a/xs_unicode.h b/xs_unicode.h index c7d6190..f5880f0 100644 --- a/xs_unicode.h +++ b/xs_unicode.h @@ -8,6 +8,9 @@ xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint); unsigned int xs_utf8_dec(char **str); int xs_unicode_width(unsigned int cpoint); + int xs_is_surrogate(unsigned int cpoint); + unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2); + unsigned int xs_surrogate_enc(unsigned int cpoint); unsigned int *_xs_unicode_upper_search(unsigned int cpoint); unsigned int *_xs_unicode_lower_search(unsigned int cpoint); #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint)) @@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint) } +/** surrogate pairs **/ + +int xs_is_surrogate(unsigned int cpoint) +/* checks if cpoint is the first element of a Unicode surrogate pair */ +{ + return cpoint >= 0xd800 && cpoint <= 0xdfff; +} + + +unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2) +/* "decodes" a surrogate pair into a codepoint */ +{ + return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff); +} + + +unsigned int xs_surrogate_enc(unsigned int cpoint) +/* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */ +{ + unsigned int p1 = 0xd7c0 + (cpoint >> 10); + unsigned int p2 = 0xdc00 + (cpoint & 0x3ff); + + return (p1 << 16) | p2; +} + + #ifdef _XS_UNICODE_TBL_H /* include xs_unicode_tbl.h before this one to use these functions */ -- cgit v1.2.3