BadVPN – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 /**
2 * @file NCDConfigTokenizer.c
3 * @author Ambroz Bizjak <ambrop7@gmail.com>
4 *
5 * @section LICENSE
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29  
30 #include <string.h>
31 #include <stddef.h>
32 #include <stdlib.h>
33  
34 #include <misc/debug.h>
35 #include <misc/string_begins_with.h>
36 #include <misc/balloc.h>
37 #include <misc/expstring.h>
38 #include <misc/parse_number.h>
39 #include <base/BLog.h>
40  
41 #include <ncd/NCDConfigTokenizer.h>
42  
43 #include <generated/blog_channel_NCDConfigTokenizer.h>
44  
45 static int is_name_char (char c)
46 {
47 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_');
48 }
49  
50 static int is_name_first_char (char c)
51 {
52 return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
53 }
54  
55 static int is_space_char (char c)
56 {
57 return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
58 }
59  
60 static int string_equals (char *str, int str_len, char *needle)
61 {
62 return (str_len == strlen(needle) && !memcmp(str, needle, str_len));
63 }
64  
65 void NCDConfigTokenizer_Tokenize (MemRef the_str, NCDConfigTokenizer_output output, void *user)
66 {
67 char const *str = the_str.ptr;
68 size_t left = the_str.len;
69  
70 size_t line = 1;
71 size_t line_char = 1;
72  
73 while (left > 0) {
74 size_t l;
75 int error = 0;
76 int token;
77 void *token_val = NULL;
78 size_t token_len = 0;
79  
80 if (*str == '#') {
81 l = 1;
82 while (l < left && str[l] != '\n') {
83 l++;
84 }
85 token = 0;
86 }
87 else if (l = data_begins_with(str, left, "{")) {
88 token = NCD_TOKEN_CURLY_OPEN;
89 }
90 else if (l = data_begins_with(str, left, "}")) {
91 token = NCD_TOKEN_CURLY_CLOSE;
92 }
93 else if (l = data_begins_with(str, left, "(")) {
94 token = NCD_TOKEN_ROUND_OPEN;
95 }
96 else if (l = data_begins_with(str, left, ")")) {
97 token = NCD_TOKEN_ROUND_CLOSE;
98 }
99 else if (l = data_begins_with(str, left, ";")) {
100 token = NCD_TOKEN_SEMICOLON;
101 }
102 else if (l = data_begins_with(str, left, ".")) {
103 token = NCD_TOKEN_DOT;
104 }
105 else if (l = data_begins_with(str, left, ",")) {
106 token = NCD_TOKEN_COMMA;
107 }
108 else if (l = data_begins_with(str, left, ":")) {
109 token = NCD_TOKEN_COLON;
110 }
111 else if (l = data_begins_with(str, left, "[")) {
112 token = NCD_TOKEN_BRACKET_OPEN;
113 }
114 else if (l = data_begins_with(str, left, "]")) {
115 token = NCD_TOKEN_BRACKET_CLOSE;
116 }
117 else if (l = data_begins_with(str, left, "@")) {
118 token = NCD_TOKEN_AT;
119 }
120 else if (l = data_begins_with(str, left, "^")) {
121 token = NCD_TOKEN_CARET;
122 }
123 else if (l = data_begins_with(str, left, "->")) {
124 token = NCD_TOKEN_ARROW;
125 }
126 else if (l = data_begins_with(str, left, "If")) {
127 token = NCD_TOKEN_IF;
128 }
129 else if (l = data_begins_with(str, left, "Elif")) {
130 token = NCD_TOKEN_ELIF;
131 }
132 else if (l = data_begins_with(str, left, "elif")) {
133 token = NCD_TOKEN_ELIF;
134 }
135 else if (l = data_begins_with(str, left, "Else")) {
136 token = NCD_TOKEN_ELSE;
137 }
138 else if (l = data_begins_with(str, left, "else")) {
139 token = NCD_TOKEN_ELSE;
140 }
141 else if (l = data_begins_with(str, left, "Foreach")) {
142 token = NCD_TOKEN_FOREACH;
143 }
144 else if (l = data_begins_with(str, left, "As")) {
145 token = NCD_TOKEN_AS;
146 }
147 else if (l = data_begins_with(str, left, "Block")) {
148 token = NCD_TOKEN_BLOCK;
149 }
150 else if (l = data_begins_with(str, left, "Do")) {
151 token = NCD_TOKEN_DO;
152 }
153 else if (l = data_begins_with(str, left, "Interrupt")) {
154 token = NCD_TOKEN_INTERRUPT;
155 }
156 else if (l = data_begins_with(str, left, "include_guard")) {
157 token = NCD_TOKEN_INCLUDE_GUARD;
158 }
159 else if (l = data_begins_with(str, left, "include")) {
160 token = NCD_TOKEN_INCLUDE;
161 }
162 else if (is_name_first_char(*str)) {
163 l = 1;
164 while (l < left && is_name_char(str[l])) {
165 l++;
166 }
167  
168 // allocate buffer
169 bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));
170 char *buf;
171 if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {
172 BLog(BLOG_ERROR, "malloc failed");
173 error = 1;
174 goto out;
175 }
176  
177 // copy and terminate
178 memcpy(buf, str, l);
179 buf[l] = '\0';
180  
181 if (!strcmp(buf, "process")) {
182 token = NCD_TOKEN_PROCESS;
183 free(buf);
184 }
185 else if (!strcmp(buf, "template")) {
186 token = NCD_TOKEN_TEMPLATE;
187 free(buf);
188 }
189 else {
190 token = NCD_TOKEN_NAME;
191 token_val = buf;
192 token_len = l;
193 }
194 }
195 else if (*str == '"') do {
196 // init string
197 ExpString estr;
198 if (!ExpString_Init(&estr)) {
199 BLog(BLOG_ERROR, "ExpString_Init failed");
200 goto string_fail0;
201 }
202  
203 // skip start quote
204 l = 1;
205  
206 // decode string
207 while (l < left) {
208 uint8_t dec_ch;
209  
210 // get character
211 if (str[l] == '\\') {
212 if (left - l < 2) {
213 BLog(BLOG_ERROR, "escape character found in string but nothing follows");
214 goto string_fail1;
215 }
216  
217 size_t extra = 0;
218  
219 switch (str[l + 1]) {
220 case '\'':
221 case '\"':
222 case '\\':
223 case '\?':
224 dec_ch = str[l + 1]; break;
225  
226 case 'a':
227 dec_ch = '\a'; break;
228 case 'b':
229 dec_ch = '\b'; break;
230 case 'f':
231 dec_ch = '\f'; break;
232 case 'n':
233 dec_ch = '\n'; break;
234 case 'r':
235 dec_ch = '\r'; break;
236 case 't':
237 dec_ch = '\t'; break;
238 case 'v':
239 dec_ch = '\v'; break;
240  
241 case '0':
242 dec_ch = 0; break;
243  
244 case 'x': {
245 if (left - l < 4) {
246 BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");
247 goto string_fail1;
248 }
249  
250 uintmax_t hex_val;
251 if (!parse_unsigned_hex_integer(MemRef_Make(&str[l + 2], 2), &hex_val)) {
252 BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");
253 goto string_fail1;
254 }
255  
256 dec_ch = hex_val;
257 extra = 2;
258 } break;
259  
260 default:
261 BLog(BLOG_ERROR, "bad escape sequence in string");
262 goto string_fail1;
263 }
264  
265 l += 2 + extra;
266 }
267 else if (str[l] == '"') {
268 break;
269 }
270 else {
271 dec_ch = str[l];
272 l++;
273 }
274  
275 // append character to string
276 if (!ExpString_AppendByte(&estr, dec_ch)) {
277 BLog(BLOG_ERROR, "ExpString_AppendChar failed");
278 goto string_fail1;
279 }
280 }
281  
282 // make sure ending quote was found
283 if (l == left) {
284 BLog(BLOG_ERROR, "missing ending quote for string");
285 goto string_fail1;
286 }
287  
288 // skip ending quote
289 l++;
290  
291 token = NCD_TOKEN_STRING;
292 token_val = ExpString_Get(&estr);
293 token_len = ExpString_Length(&estr);
294 break;
295  
296 string_fail1:
297 ExpString_Free(&estr);
298 string_fail0:
299 error = 1;
300 } while (0);
301 else if (is_space_char(*str)) {
302 token = 0;
303 l = 1;
304 }
305 else {
306 BLog(BLOG_ERROR, "unrecognized character");
307 error = 1;
308 }
309  
310 out:
311 // report error
312 if (error) {
313 output(user, NCD_ERROR, NULL, 0, line, line_char);
314 return;
315 }
316  
317 // output token
318 if (token) {
319 if (!output(user, token, token_val, token_len, line, line_char)) {
320 return;
321 }
322 }
323  
324 // update line/char counters
325 for (size_t i = 0; i < l; i++) {
326 if (str[i] == '\n') {
327 line++;
328 line_char = 1;
329 } else {
330 line_char++;
331 }
332 }
333  
334 str += l;
335 left -= l;
336 }
337  
338 output(user, NCD_EOF, NULL, 0, line, line_char);
339 }