summaryrefslogtreecommitdiff
path: root/xs_unicode.h
blob: 2f081ad61b782c111a32e2723f515500256ef363 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/* copyright (c) 2022 - 2023 grunfink / MIT license */

#ifndef _XS_UNICODE_H

#define _XS_UNICODE_H

 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
 char *xs_utf8_dec(const char *str, unsigned int *cpoint);


#ifdef XS_IMPLEMENTATION


char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
{
    unsigned char *p = (unsigned char *)buf;

    if (cpoint < 0x80) /* 1 byte char */
        *p++ = cpoint & 0xff;
    else {
        if (cpoint < 0x800) /* 2 byte char */
            *p++ = 0xc0 | (cpoint >> 6);
        else {
            if (cpoint < 0x10000) /* 3 byte char */
                *p++ = 0xe0 | (cpoint >> 12);
            else { /* 4 byte char */
                *p++ = 0xf0 | (cpoint >> 18);
                *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
            }

            *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
        }

        *p++ = 0x80 | (cpoint & 0x3f);
    }

    return (char *)p;
}


xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
/* encodes an Unicode codepoint to utf-8 into str */
{
    char tmp[4], *p;

    p = _xs_utf8_enc(tmp, cpoint);

    return xs_append_m(str, tmp, p - tmp);
}


char *xs_utf8_dec(const char *str, unsigned int *cpoint)
/* decodes an utf-8 char inside str into cpoint and returns the next position */
{
    unsigned char *p = (unsigned char *)str;
    int c = *p++;
    int cb = 0;

    if ((c & 0x80) == 0) { /* 1 byte char */
        *cpoint = c;
    }
    else
    if ((c & 0xe0) == 0xc0) { /* 2 byte char */
        *cpoint = (c & 0x1f) << 6;
        cb = 1;
    }
    else
    if ((c & 0xf0) == 0xe0) { /* 3 byte char */
        *cpoint = (c & 0x0f) << 12;
        cb = 2;
    }
    else
    if ((c & 0xf8) == 0xf0) { /* 4 byte char */
        *cpoint = (c & 0x07) << 18;
        cb = 3;
    }

    /* process the continuation bytes */
    while (cb--) {
        if ((*p & 0xc0) == 0x80)
            *cpoint |= (*p++ & 0x3f) << (cb * 6);
        else {
            *cpoint = 0xfffd;
            break;
        }
    }

    return (char *)p;
}


#endif /* XS_IMPLEMENTATION */

#endif /* _XS_UNICODE_H */