BadVPN – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /**
2 * @file regex_match.c
3 * @author Ambroz Bizjak <ambrop7@gmail.com>
4 *
5 * @section LICENSE
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * @section DESCRIPTION
30 *
31 * Regular expression matching module.
32 *
33 * Synopsis:
34 * regex_match(string input, string regex)
35 *
36 * Variables:
37 * succeeded - "true" or "false", indicating whether input matched regex
38 * matchN - for N=0,1,2,..., the matching data for the N-th subexpression
39 * (match0 = whole match)
40 *
41 * Description:
42 * Matches 'input' with the POSIX extended regular expression 'regex'.
43 * 'regex' must be a string without null bytes, but 'input' can contain null bytes.
44 * However, it's difficult, if not impossible, to actually match nulls with the regular
45 * expression.
46 * The input and regex strings are interpreted according to the POSIX regex functions
47 * (regcomp(), regexec()); in particular, the current locale setting affects the
48 * interpretation.
49 *
50 * Synopsis:
51 * regex_replace(string input, list(string) regex, list(string) replace)
52 *
53 * Variables:
54 * string (empty) - transformed input
55 *
56 * Description:
57 * Replaces matching parts of a string. Replacement is performed by repetedly matching
58 * the remaining part of the string with all regular expressions. On each step, out of
59 * all regular expressions that match the remainder of the string, the one whose match
60 * starts at the least position wins, and the matching part is replaced with the
61 * replacement string corresponding to this regular expression. The process continues
62 * from the end of the just-replaced portion until no more regular expressions match.
63 * If multiple regular expressions match at the least position, the one that appears
64 * first in the 'regex' argument wins.
65 */
66  
67 #include <stdlib.h>
68 #include <string.h>
69 #include <limits.h>
70 #include <regex.h>
71  
72 #include <misc/string_begins_with.h>
73 #include <misc/parse_number.h>
74 #include <misc/expstring.h>
75 #include <misc/debug.h>
76 #include <misc/balloc.h>
77  
78 #include <ncd/module_common.h>
79  
80 #include <generated/blog_channel_ncd_regex_match.h>
81  
82 #define MAX_MATCHES 64
83  
84 struct instance {
85 NCDModuleInst *i;
86 MemRef input;
87 int succeeded;
88 int num_matches;
89 regmatch_t matches[MAX_MATCHES];
90 };
91  
92 struct replace_instance {
93 NCDModuleInst *i;
94 MemRef output;
95 };
96  
97 static void func_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
98 {
99 struct instance *o = vo;
100 o->i = i;
101  
102 // read arguments
103 NCDValRef input_arg;
104 NCDValRef regex_arg;
105 if (!NCDVal_ListRead(params->args, 2, &input_arg, &regex_arg)) {
106 ModuleLog(o->i, BLOG_ERROR, "wrong arity");
107 goto fail0;
108 }
109 if (!NCDVal_IsString(input_arg) || !NCDVal_IsStringNoNulls(regex_arg)) {
110 ModuleLog(o->i, BLOG_ERROR, "wrong type");
111 goto fail0;
112 }
113 o->input = NCDVal_StringMemRef(input_arg);
114  
115 // make sure we don't overflow regoff_t
116 if (o->input.len > INT_MAX) {
117 ModuleLog(o->i, BLOG_ERROR, "input string too long");
118 goto fail0;
119 }
120  
121 // null terminate regex
122 NCDValNullTermString regex_nts;
123 if (!NCDVal_StringNullTerminate(regex_arg, &regex_nts)) {
124 ModuleLog(i, BLOG_ERROR, "NCDVal_StringNullTerminate failed");
125 goto fail0;
126 }
127  
128 // compile regex
129 regex_t preg;
130 int ret = regcomp(&preg, regex_nts.data, REG_EXTENDED);
131 NCDValNullTermString_Free(&regex_nts);
132 if (ret != 0) {
133 ModuleLog(o->i, BLOG_ERROR, "regcomp failed (error=%d)", ret);
134 goto fail0;
135 }
136  
137 // execute match
138 o->matches[0].rm_so = 0;
139 o->matches[0].rm_eo = o->input.len;
140 o->succeeded = (regexec(&preg, o->input.ptr, MAX_MATCHES, o->matches, REG_STARTEND) == 0);
141  
142 // free regex
143 regfree(&preg);
144  
145 // signal up
146 NCDModuleInst_Backend_Up(o->i);
147 return;
148  
149 fail0:
150 NCDModuleInst_Backend_DeadError(i);
151 }
152  
153 static int func_getvar (void *vo, const char *name, NCDValMem *mem, NCDValRef *out)
154 {
155 struct instance *o = vo;
156  
157 if (!strcmp(name, "succeeded")) {
158 *out = ncd_make_boolean(mem, o->succeeded);
159 return 1;
160 }
161  
162 size_t pos;
163 uintmax_t n;
164 if ((pos = string_begins_with(name, "match")) && parse_unsigned_integer(MemRef_MakeCstr(name + pos), &n)) {
165 if (o->succeeded && n < MAX_MATCHES && o->matches[n].rm_so >= 0) {
166 regmatch_t *m = &o->matches[n];
167  
168 ASSERT(m->rm_so <= o->input.len)
169 ASSERT(m->rm_eo >= m->rm_so)
170 ASSERT(m->rm_eo <= o->input.len)
171  
172 size_t len = m->rm_eo - m->rm_so;
173  
174 *out = NCDVal_NewStringBinMr(mem, MemRef_Sub(o->input, m->rm_so, len));
175 return 1;
176 }
177 }
178  
179 return 0;
180 }
181  
182 static void replace_func_new (void *vo, NCDModuleInst *i, const struct NCDModuleInst_new_params *params)
183 {
184 struct replace_instance *o = vo;
185 o->i = i;
186  
187 // read arguments
188 NCDValRef input_arg;
189 NCDValRef regex_arg;
190 NCDValRef replace_arg;
191 if (!NCDVal_ListRead(params->args, 3, &input_arg, &regex_arg, &replace_arg)) {
192 ModuleLog(i, BLOG_ERROR, "wrong arity");
193 goto fail1;
194 }
195 if (!NCDVal_IsString(input_arg) || !NCDVal_IsList(regex_arg) || !NCDVal_IsList(replace_arg)) {
196 ModuleLog(i, BLOG_ERROR, "wrong type");
197 goto fail1;
198 }
199  
200 // check number of regex/replace
201 if (NCDVal_ListCount(regex_arg) != NCDVal_ListCount(replace_arg)) {
202 ModuleLog(i, BLOG_ERROR, "number of regex's is not the same as number of replacements");
203 goto fail1;
204 }
205 size_t num_regex = NCDVal_ListCount(regex_arg);
206  
207 // allocate array for compiled regex's
208 regex_t *regs = BAllocArray(num_regex, sizeof(regs[0]));
209 if (!regs) {
210 ModuleLog(i, BLOG_ERROR, "BAllocArray failed");
211 goto fail1;
212 }
213 size_t num_done_regex = 0;
214  
215 // compile regex's, check arguments
216 while (num_done_regex < num_regex) {
217 NCDValRef regex = NCDVal_ListGet(regex_arg, num_done_regex);
218 NCDValRef replace = NCDVal_ListGet(replace_arg, num_done_regex);
219  
220 if (!NCDVal_IsStringNoNulls(regex) || !NCDVal_IsString(replace)) {
221 ModuleLog(i, BLOG_ERROR, "wrong regex/replace type for pair %zu", num_done_regex);
222 goto fail2;
223 }
224  
225 // null terminate regex
226 NCDValNullTermString regex_nts;
227 if (!NCDVal_StringNullTerminate(regex, &regex_nts)) {
228 ModuleLog(i, BLOG_ERROR, "NCDVal_StringNullTerminate failed");
229 goto fail2;
230 }
231  
232 int res = regcomp(&regs[num_done_regex], regex_nts.data, REG_EXTENDED);
233 NCDValNullTermString_Free(&regex_nts);
234 if (res != 0) {
235 ModuleLog(i, BLOG_ERROR, "regcomp failed for pair %zu (error=%d)", num_done_regex, res);
236 goto fail2;
237 }
238  
239 num_done_regex++;
240 }
241  
242 // init output string
243 ExpString out;
244 if (!ExpString_Init(&out)) {
245 ModuleLog(i, BLOG_ERROR, "ExpString_Init failed");
246 goto fail2;
247 }
248  
249 // input state
250 MemRef in = NCDVal_StringMemRef(input_arg);
251 size_t in_pos = 0;
252  
253 // process input
254 while (in_pos < in.len) {
255 // find first match
256 int have_match = 0;
257 size_t match_regex = 0; // to remove warning
258 regmatch_t match = {0, 0}; // to remove warning
259 for (size_t j = 0; j < num_regex; j++) {
260 regmatch_t this_match;
261 this_match.rm_so = 0;
262 this_match.rm_eo = in.len - in_pos;
263 if (regexec(&regs[j], in.ptr + in_pos, 1, &this_match, REG_STARTEND) == 0 && (!have_match || this_match.rm_so < match.rm_so)) {
264 have_match = 1;
265 match_regex = j;
266 match = this_match;
267 }
268 }
269  
270 // if no match, append remaining data and finish
271 if (!have_match) {
272 if (!ExpString_AppendBinaryMr(&out, MemRef_SubFrom(in, in_pos))) {
273 ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinaryMr failed");
274 goto fail3;
275 }
276 break;
277 }
278  
279 // append data before match
280 if (!ExpString_AppendBinaryMr(&out, MemRef_Sub(in, in_pos, match.rm_so))) {
281 ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinaryMr failed");
282 goto fail3;
283 }
284  
285 // append replacement data
286 NCDValRef replace = NCDVal_ListGet(replace_arg, match_regex);
287 if (!ExpString_AppendBinaryMr(&out, NCDVal_StringMemRef(replace))) {
288 ModuleLog(i, BLOG_ERROR, "ExpString_AppendBinaryMr failed");
289 goto fail3;
290 }
291  
292 in_pos += match.rm_eo;
293 }
294  
295 // set output
296 o->output = ExpString_GetMr(&out);
297  
298 // free compiled regex's
299 while (num_done_regex-- > 0) {
300 regfree(&regs[num_done_regex]);
301 }
302  
303 // free array
304 BFree(regs);
305  
306 // signal up
307 NCDModuleInst_Backend_Up(i);
308 return;
309  
310 fail3:
311 ExpString_Free(&out);
312 fail2:
313 while (num_done_regex-- > 0) {
314 regfree(&regs[num_done_regex]);
315 }
316 BFree(regs);
317 fail1:
318 NCDModuleInst_Backend_DeadError(i);
319 }
320  
321 static void replace_func_die (void *vo)
322 {
323 struct replace_instance *o = vo;
324  
325 // free output
326 BFree((char *)o->output.ptr);
327  
328 NCDModuleInst_Backend_Dead(o->i);
329 }
330  
331 static int replace_func_getvar (void *vo, const char *name, NCDValMem *mem, NCDValRef *out)
332 {
333 struct replace_instance *o = vo;
334  
335 if (!strcmp(name, "")) {
336 *out = NCDVal_NewStringBinMr(mem, o->output);
337 return 1;
338 }
339  
340 return 0;
341 }
342  
343 static struct NCDModule modules[] = {
344 {
345 .type = "regex_match",
346 .func_new2 = func_new,
347 .func_getvar = func_getvar,
348 .alloc_size = sizeof(struct instance)
349 }, {
350 .type = "regex_replace",
351 .func_new2 = replace_func_new,
352 .func_die = replace_func_die,
353 .func_getvar = replace_func_getvar,
354 .alloc_size = sizeof(struct replace_instance)
355 }, {
356 .type = NULL
357 }
358 };
359  
360 const struct NCDModuleGroup ncdmodule_regex_match = {
361 .modules = modules
362 };