1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */
#ifndef _XS_REGEX_H
#define _XS_REGEX_H
xs_list *xs_regex_split_n(const char *str, const char *rx, int count);
#define xs_regex_split(str, rx) xs_regex_split_n(str, rx, XS_ALL)
xs_list *xs_regex_match_n(const char *str, const char *rx, int count);
#define xs_regex_match(str, rx) xs_regex_match_n(str, rx, XS_ALL)
xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count);
#define xs_regex_replace(str, rx, rep) xs_regex_replace_n(str, rx, rep, XS_ALL)
#ifdef XS_IMPLEMENTATION
#include <regex.h>
xs_list *xs_regex_split_n(const char *str, const char *rx, int count)
/* splits str by regex */
{
regex_t re;
regmatch_t rm;
int offset = 0;
xs_list *list = NULL;
const char *p;
if (regcomp(&re, rx, REG_EXTENDED))
return NULL;
list = xs_list_new();
while (count > 0 && !regexec(&re, (p = str + offset), 1, &rm, offset > 0 ? REG_NOTBOL : 0)) {
/* add first the leading part of the string */
list = xs_list_append_m(list, p, rm.rm_so);
list = xs_insert_m(list, xs_size(list) - 1, "", 1);
/* add now the matched text as the separator */
list = xs_list_append_m(list, p + rm.rm_so, rm.rm_eo - rm.rm_so);
list = xs_insert_m(list, xs_size(list) - 1, "", 1);
/* move forward */
offset += rm.rm_eo;
count--;
}
/* add the rest of the string */
list = xs_list_append(list, p);
regfree(&re);
return list;
}
xs_list *xs_regex_match_n(const char *str, const char *rx, int count)
/* returns a list with upto count matches */
{
xs_list *list = xs_list_new();
xs *split = NULL;
xs_list *p;
xs_val *v;
int n = 0;
/* split */
split = xs_regex_split_n(str, rx, count);
/* now iterate to get only the 'separators' (odd ones) */
p = split;
while (xs_list_iter(&p, &v)) {
if (n & 0x1)
list = xs_list_append(list, v);
n++;
}
return list;
}
xs_list *xs_regex_replace_n(const char *str, const char *rx, const char *rep, int count)
/* replaces all matches with the rep string. If it contains unescaped &,
they are replaced with the match */
{
xs_str *s = xs_str_new(NULL);
xs *split = xs_regex_split_n(str, rx, count);
xs_list *p;
xs_val *v;
int n = 0;
int pholder = !!strchr(rep, '&');
p = split;
while (xs_list_iter(&p, &v)) {
if (n & 0x1) {
if (pholder) {
/* rep has a placeholder; process char by char */
const char *p = rep;
while (*p) {
if (*p == '&')
s = xs_str_cat(s, v);
else {
if (*p == '\\')
p++;
if (!*p)
break;
s = xs_append_m(s, p, 1);
}
p++;
}
}
else
s = xs_str_cat(s, rep);
}
else
s = xs_str_cat(s, v);
n++;
}
return s;
}
#endif /* XS_IMPLEMENTATION */
#endif /* XS_REGEX_H */
|