BadVPN – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | /** |
2 | * @file NCDConfigTokenizer.c |
||
3 | * @author Ambroz Bizjak <ambrop7@gmail.com> |
||
4 | * |
||
5 | * @section LICENSE |
||
6 | * |
||
7 | * Redistribution and use in source and binary forms, with or without |
||
8 | * modification, are permitted provided that the following conditions are met: |
||
9 | * 1. Redistributions of source code must retain the above copyright |
||
10 | * notice, this list of conditions and the following disclaimer. |
||
11 | * 2. Redistributions in binary form must reproduce the above copyright |
||
12 | * notice, this list of conditions and the following disclaimer in the |
||
13 | * documentation and/or other materials provided with the distribution. |
||
14 | * 3. Neither the name of the author nor the |
||
15 | * names of its contributors may be used to endorse or promote products |
||
16 | * derived from this software without specific prior written permission. |
||
17 | * |
||
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
||
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
||
20 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
||
21 | * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
||
22 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
||
23 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
||
24 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
||
25 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
||
27 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
28 | */ |
||
29 | |||
30 | #include <string.h> |
||
31 | #include <stddef.h> |
||
32 | #include <stdlib.h> |
||
33 | |||
34 | #include <misc/debug.h> |
||
35 | #include <misc/string_begins_with.h> |
||
36 | #include <misc/balloc.h> |
||
37 | #include <misc/expstring.h> |
||
38 | #include <misc/parse_number.h> |
||
39 | #include <base/BLog.h> |
||
40 | |||
41 | #include <ncd/NCDConfigTokenizer.h> |
||
42 | |||
43 | #include <generated/blog_channel_NCDConfigTokenizer.h> |
||
44 | |||
45 | static int is_name_char (char c) |
||
46 | { |
||
47 | return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'); |
||
48 | } |
||
49 | |||
50 | static int is_name_first_char (char c) |
||
51 | { |
||
52 | return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'); |
||
53 | } |
||
54 | |||
55 | static int is_space_char (char c) |
||
56 | { |
||
57 | return (c == ' ' || c == '\t' || c == '\n' || c == '\r'); |
||
58 | } |
||
59 | |||
60 | static int string_equals (char *str, int str_len, char *needle) |
||
61 | { |
||
62 | return (str_len == strlen(needle) && !memcmp(str, needle, str_len)); |
||
63 | } |
||
64 | |||
65 | void NCDConfigTokenizer_Tokenize (MemRef the_str, NCDConfigTokenizer_output output, void *user) |
||
66 | { |
||
67 | char const *str = the_str.ptr; |
||
68 | size_t left = the_str.len; |
||
69 | |||
70 | size_t line = 1; |
||
71 | size_t line_char = 1; |
||
72 | |||
73 | while (left > 0) { |
||
74 | size_t l; |
||
75 | int error = 0; |
||
76 | int token; |
||
77 | void *token_val = NULL; |
||
78 | size_t token_len = 0; |
||
79 | |||
80 | if (*str == '#') { |
||
81 | l = 1; |
||
82 | while (l < left && str[l] != '\n') { |
||
83 | l++; |
||
84 | } |
||
85 | token = 0; |
||
86 | } |
||
87 | else if (l = data_begins_with(str, left, "{")) { |
||
88 | token = NCD_TOKEN_CURLY_OPEN; |
||
89 | } |
||
90 | else if (l = data_begins_with(str, left, "}")) { |
||
91 | token = NCD_TOKEN_CURLY_CLOSE; |
||
92 | } |
||
93 | else if (l = data_begins_with(str, left, "(")) { |
||
94 | token = NCD_TOKEN_ROUND_OPEN; |
||
95 | } |
||
96 | else if (l = data_begins_with(str, left, ")")) { |
||
97 | token = NCD_TOKEN_ROUND_CLOSE; |
||
98 | } |
||
99 | else if (l = data_begins_with(str, left, ";")) { |
||
100 | token = NCD_TOKEN_SEMICOLON; |
||
101 | } |
||
102 | else if (l = data_begins_with(str, left, ".")) { |
||
103 | token = NCD_TOKEN_DOT; |
||
104 | } |
||
105 | else if (l = data_begins_with(str, left, ",")) { |
||
106 | token = NCD_TOKEN_COMMA; |
||
107 | } |
||
108 | else if (l = data_begins_with(str, left, ":")) { |
||
109 | token = NCD_TOKEN_COLON; |
||
110 | } |
||
111 | else if (l = data_begins_with(str, left, "[")) { |
||
112 | token = NCD_TOKEN_BRACKET_OPEN; |
||
113 | } |
||
114 | else if (l = data_begins_with(str, left, "]")) { |
||
115 | token = NCD_TOKEN_BRACKET_CLOSE; |
||
116 | } |
||
117 | else if (l = data_begins_with(str, left, "@")) { |
||
118 | token = NCD_TOKEN_AT; |
||
119 | } |
||
120 | else if (l = data_begins_with(str, left, "^")) { |
||
121 | token = NCD_TOKEN_CARET; |
||
122 | } |
||
123 | else if (l = data_begins_with(str, left, "->")) { |
||
124 | token = NCD_TOKEN_ARROW; |
||
125 | } |
||
126 | else if (l = data_begins_with(str, left, "If")) { |
||
127 | token = NCD_TOKEN_IF; |
||
128 | } |
||
129 | else if (l = data_begins_with(str, left, "Elif")) { |
||
130 | token = NCD_TOKEN_ELIF; |
||
131 | } |
||
132 | else if (l = data_begins_with(str, left, "elif")) { |
||
133 | token = NCD_TOKEN_ELIF; |
||
134 | } |
||
135 | else if (l = data_begins_with(str, left, "Else")) { |
||
136 | token = NCD_TOKEN_ELSE; |
||
137 | } |
||
138 | else if (l = data_begins_with(str, left, "else")) { |
||
139 | token = NCD_TOKEN_ELSE; |
||
140 | } |
||
141 | else if (l = data_begins_with(str, left, "Foreach")) { |
||
142 | token = NCD_TOKEN_FOREACH; |
||
143 | } |
||
144 | else if (l = data_begins_with(str, left, "As")) { |
||
145 | token = NCD_TOKEN_AS; |
||
146 | } |
||
147 | else if (l = data_begins_with(str, left, "Block")) { |
||
148 | token = NCD_TOKEN_BLOCK; |
||
149 | } |
||
150 | else if (l = data_begins_with(str, left, "Do")) { |
||
151 | token = NCD_TOKEN_DO; |
||
152 | } |
||
153 | else if (l = data_begins_with(str, left, "Interrupt")) { |
||
154 | token = NCD_TOKEN_INTERRUPT; |
||
155 | } |
||
156 | else if (l = data_begins_with(str, left, "include_guard")) { |
||
157 | token = NCD_TOKEN_INCLUDE_GUARD; |
||
158 | } |
||
159 | else if (l = data_begins_with(str, left, "include")) { |
||
160 | token = NCD_TOKEN_INCLUDE; |
||
161 | } |
||
162 | else if (is_name_first_char(*str)) { |
||
163 | l = 1; |
||
164 | while (l < left && is_name_char(str[l])) { |
||
165 | l++; |
||
166 | } |
||
167 | |||
168 | // allocate buffer |
||
169 | bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1)); |
||
170 | char *buf; |
||
171 | if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) { |
||
172 | BLog(BLOG_ERROR, "malloc failed"); |
||
173 | error = 1; |
||
174 | goto out; |
||
175 | } |
||
176 | |||
177 | // copy and terminate |
||
178 | memcpy(buf, str, l); |
||
179 | buf[l] = '\0'; |
||
180 | |||
181 | if (!strcmp(buf, "process")) { |
||
182 | token = NCD_TOKEN_PROCESS; |
||
183 | free(buf); |
||
184 | } |
||
185 | else if (!strcmp(buf, "template")) { |
||
186 | token = NCD_TOKEN_TEMPLATE; |
||
187 | free(buf); |
||
188 | } |
||
189 | else { |
||
190 | token = NCD_TOKEN_NAME; |
||
191 | token_val = buf; |
||
192 | token_len = l; |
||
193 | } |
||
194 | } |
||
195 | else if (*str == '"') do { |
||
196 | // init string |
||
197 | ExpString estr; |
||
198 | if (!ExpString_Init(&estr)) { |
||
199 | BLog(BLOG_ERROR, "ExpString_Init failed"); |
||
200 | goto string_fail0; |
||
201 | } |
||
202 | |||
203 | // skip start quote |
||
204 | l = 1; |
||
205 | |||
206 | // decode string |
||
207 | while (l < left) { |
||
208 | uint8_t dec_ch; |
||
209 | |||
210 | // get character |
||
211 | if (str[l] == '\\') { |
||
212 | if (left - l < 2) { |
||
213 | BLog(BLOG_ERROR, "escape character found in string but nothing follows"); |
||
214 | goto string_fail1; |
||
215 | } |
||
216 | |||
217 | size_t extra = 0; |
||
218 | |||
219 | switch (str[l + 1]) { |
||
220 | case '\'': |
||
221 | case '\"': |
||
222 | case '\\': |
||
223 | case '\?': |
||
224 | dec_ch = str[l + 1]; break; |
||
225 | |||
226 | case 'a': |
||
227 | dec_ch = '\a'; break; |
||
228 | case 'b': |
||
229 | dec_ch = '\b'; break; |
||
230 | case 'f': |
||
231 | dec_ch = '\f'; break; |
||
232 | case 'n': |
||
233 | dec_ch = '\n'; break; |
||
234 | case 'r': |
||
235 | dec_ch = '\r'; break; |
||
236 | case 't': |
||
237 | dec_ch = '\t'; break; |
||
238 | case 'v': |
||
239 | dec_ch = '\v'; break; |
||
240 | |||
241 | case '0': |
||
242 | dec_ch = 0; break; |
||
243 | |||
244 | case 'x': { |
||
245 | if (left - l < 4) { |
||
246 | BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow"); |
||
247 | goto string_fail1; |
||
248 | } |
||
249 | |||
250 | uintmax_t hex_val; |
||
251 | if (!parse_unsigned_hex_integer(MemRef_Make(&str[l + 2], 2), &hex_val)) { |
||
252 | BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow"); |
||
253 | goto string_fail1; |
||
254 | } |
||
255 | |||
256 | dec_ch = hex_val; |
||
257 | extra = 2; |
||
258 | } break; |
||
259 | |||
260 | default: |
||
261 | BLog(BLOG_ERROR, "bad escape sequence in string"); |
||
262 | goto string_fail1; |
||
263 | } |
||
264 | |||
265 | l += 2 + extra; |
||
266 | } |
||
267 | else if (str[l] == '"') { |
||
268 | break; |
||
269 | } |
||
270 | else { |
||
271 | dec_ch = str[l]; |
||
272 | l++; |
||
273 | } |
||
274 | |||
275 | // append character to string |
||
276 | if (!ExpString_AppendByte(&estr, dec_ch)) { |
||
277 | BLog(BLOG_ERROR, "ExpString_AppendChar failed"); |
||
278 | goto string_fail1; |
||
279 | } |
||
280 | } |
||
281 | |||
282 | // make sure ending quote was found |
||
283 | if (l == left) { |
||
284 | BLog(BLOG_ERROR, "missing ending quote for string"); |
||
285 | goto string_fail1; |
||
286 | } |
||
287 | |||
288 | // skip ending quote |
||
289 | l++; |
||
290 | |||
291 | token = NCD_TOKEN_STRING; |
||
292 | token_val = ExpString_Get(&estr); |
||
293 | token_len = ExpString_Length(&estr); |
||
294 | break; |
||
295 | |||
296 | string_fail1: |
||
297 | ExpString_Free(&estr); |
||
298 | string_fail0: |
||
299 | error = 1; |
||
300 | } while (0); |
||
301 | else if (is_space_char(*str)) { |
||
302 | token = 0; |
||
303 | l = 1; |
||
304 | } |
||
305 | else { |
||
306 | BLog(BLOG_ERROR, "unrecognized character"); |
||
307 | error = 1; |
||
308 | } |
||
309 | |||
310 | out: |
||
311 | // report error |
||
312 | if (error) { |
||
313 | output(user, NCD_ERROR, NULL, 0, line, line_char); |
||
314 | return; |
||
315 | } |
||
316 | |||
317 | // output token |
||
318 | if (token) { |
||
319 | if (!output(user, token, token_val, token_len, line, line_char)) { |
||
320 | return; |
||
321 | } |
||
322 | } |
||
323 | |||
324 | // update line/char counters |
||
325 | for (size_t i = 0; i < l; i++) { |
||
326 | if (str[i] == '\n') { |
||
327 | line++; |
||
328 | line_char = 1; |
||
329 | } else { |
||
330 | line_char++; |
||
331 | } |
||
332 | } |
||
333 | |||
334 | str += l; |
||
335 | left -= l; |
||
336 | } |
||
337 | |||
338 | output(user, NCD_EOF, NULL, 0, line, line_char); |
||
339 | } |