WebSVN – BadVPN – Blame – Rev 1 – /ncd/NCDConfigTokenizer.c

1

office

1

/**

2

* @file NCDConfigTokenizer.c

3

* @author Ambroz Bizjak <ambrop7@gmail.com>

*

* @section LICENSE

*

* Redistribution and use in source and binary forms, with or without

8

* modification, are permitted provided that the following conditions are met:

9

* 1. Redistributions of source code must retain the above copyright

10

* notice, this list of conditions and the following disclaimer.

11

* 2. Redistributions in binary form must reproduce the above copyright

12

* notice, this list of conditions and the following disclaimer in the

13

* documentation and/or other materials provided with the distribution.

14

* 3. Neither the name of the author nor the

15

* names of its contributors may be used to endorse or promote products

16

* derived from this software without specific prior written permission.

17

*

18

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND

19

* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

20

* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

21

* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY

22

* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

23

* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

24

* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND

25

* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

26

* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

27

* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

28

*/

29

30

#include <string.h>

31

#include <stddef.h>

32

#include <stdlib.h>

33

34

#include <misc/debug.h>

35

#include <misc/string_begins_with.h>

36

#include <misc/balloc.h>

37

#include <misc/expstring.h>

38

#include <misc/parse_number.h>

39

#include <base/BLog.h>

40

41

#include <ncd/NCDConfigTokenizer.h>

42

43

#include <generated/blog_channel_NCDConfigTokenizer.h>

44

45

static int is_name_char (char c)

46

{

47

return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_');

48

}

49

50

static int is_name_first_char (char c)

51

{

52

return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');

53

}

54

55

static int is_space_char (char c)

56

{

57

return (c == ' ' || c == '\t' || c == '\n' || c == '\r');

58

}

59

60

static int string_equals (char *str, int str_len, char *needle)

61

{

62

return (str_len == strlen(needle) && !memcmp(str, needle, str_len));

63

}

64

65

void NCDConfigTokenizer_Tokenize (MemRef the_str, NCDConfigTokenizer_output output, void *user)

66

{

67

char const *str = the_str.ptr;

68

size_t left = the_str.len;

69

70

size_t line = 1;

71

size_t line_char = 1;

while (left > 0) {

size_t l;

int error = 0;

int token;

void *token_val = NULL;

78

size_t token_len = 0;

if (*str == '#') {

l = 1;

while (l < left && str[l] != '\n') {

l++;

}

token = 0;

}

else if (l = data_begins_with(str, left, "{")) {

88

token = NCD_TOKEN_CURLY_OPEN;

89

}

90

else if (l = data_begins_with(str, left, "}")) {

91

token = NCD_TOKEN_CURLY_CLOSE;

92

}

93

else if (l = data_begins_with(str, left, "(")) {

94

token = NCD_TOKEN_ROUND_OPEN;

95

}

96

else if (l = data_begins_with(str, left, ")")) {

97

token = NCD_TOKEN_ROUND_CLOSE;

98

}

99

else if (l = data_begins_with(str, left, ";")) {

100

token = NCD_TOKEN_SEMICOLON;

101

}

102

else if (l = data_begins_with(str, left, ".")) {

103

token = NCD_TOKEN_DOT;

104

}

105

else if (l = data_begins_with(str, left, ",")) {

106

token = NCD_TOKEN_COMMA;

107

}

108

else if (l = data_begins_with(str, left, ":")) {

109

token = NCD_TOKEN_COLON;

110

}

111

else if (l = data_begins_with(str, left, "[")) {

112

token = NCD_TOKEN_BRACKET_OPEN;

113

}

114

else if (l = data_begins_with(str, left, "]")) {

115

token = NCD_TOKEN_BRACKET_CLOSE;

116

}

117

else if (l = data_begins_with(str, left, "@")) {

118

token = NCD_TOKEN_AT;

119

}

120

else if (l = data_begins_with(str, left, "^")) {

121

token = NCD_TOKEN_CARET;

122

}

123

else if (l = data_begins_with(str, left, "->")) {

124

token = NCD_TOKEN_ARROW;

125

}

126

else if (l = data_begins_with(str, left, "If")) {

127

token = NCD_TOKEN_IF;

128

}

129

else if (l = data_begins_with(str, left, "Elif")) {

130

token = NCD_TOKEN_ELIF;

131

}

132

else if (l = data_begins_with(str, left, "elif")) {

133

token = NCD_TOKEN_ELIF;

134

}

135

else if (l = data_begins_with(str, left, "Else")) {

136

token = NCD_TOKEN_ELSE;

137

}

138

else if (l = data_begins_with(str, left, "else")) {

139

token = NCD_TOKEN_ELSE;

140

}

141

else if (l = data_begins_with(str, left, "Foreach")) {

142

token = NCD_TOKEN_FOREACH;

143

}

144

else if (l = data_begins_with(str, left, "As")) {

145

token = NCD_TOKEN_AS;

146

}

147

else if (l = data_begins_with(str, left, "Block")) {

148

token = NCD_TOKEN_BLOCK;

149

}

150

else if (l = data_begins_with(str, left, "Do")) {

151

token = NCD_TOKEN_DO;

152

}

153

else if (l = data_begins_with(str, left, "Interrupt")) {

154

token = NCD_TOKEN_INTERRUPT;

155

}

156

else if (l = data_begins_with(str, left, "include_guard")) {

157

token = NCD_TOKEN_INCLUDE_GUARD;

158

}

159

else if (l = data_begins_with(str, left, "include")) {

160

token = NCD_TOKEN_INCLUDE;

161

}

162

else if (is_name_first_char(*str)) {

163

l = 1;

164

while (l < left && is_name_char(str[l])) {

l++;

}

// allocate buffer

bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));

170

char *buf;

171

if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {

172

BLog(BLOG_ERROR, "malloc failed");

error = 1;

goto out;

}

// copy and terminate

178

memcpy(buf, str, l);

179

buf[l] = '\0';

180

181

if (!strcmp(buf, "process")) {

182

token = NCD_TOKEN_PROCESS;

183

free(buf);

184

}

185

else if (!strcmp(buf, "template")) {

186

token = NCD_TOKEN_TEMPLATE;

free(buf);

}

else {

token = NCD_TOKEN_NAME;

token_val = buf;

token_len = l;

}

}

else if (*str == '"') do {

196

// init string

197

ExpString estr;

198

if (!ExpString_Init(&estr)) {

199

BLog(BLOG_ERROR, "ExpString_Init failed");

goto string_fail0;

}

// skip start quote

l = 1;

// decode string

while (l < left) {

uint8_t dec_ch;

// get character

if (str[l] == '\\') {

212

if (left - l < 2) {

213

BLog(BLOG_ERROR, "escape character found in string but nothing follows");

goto string_fail1;

}

size_t extra = 0;

switch (str[l + 1]) {

case '\'':

case '\"':

case '\\':

case '\?':

dec_ch = str[l + 1]; break;

225

226

case 'a':

227

dec_ch = '\a'; break;

228

case 'b':

229

dec_ch = '\b'; break;

230

case 'f':

231

dec_ch = '\f'; break;

232

case 'n':

233

dec_ch = '\n'; break;

234

case 'r':

235

dec_ch = '\r'; break;

236

case 't':

237

dec_ch = '\t'; break;

238

case 'v':

239

dec_ch = '\v'; break;

case '0':

dec_ch = 0; break;

case 'x': {

if (left - l < 4) {

246

BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");

goto string_fail1;

}

uintmax_t hex_val;

if (!parse_unsigned_hex_integer(MemRef_Make(&str[l + 2], 2), &hex_val)) {

252

                                BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");

goto string_fail1;

}

dec_ch = hex_val;

extra = 2;

} break;

default:

BLog(BLOG_ERROR, "bad escape sequence in string");

goto string_fail1;

}

l += 2 + extra;

}

else if (str[l] == '"') {

break;

}

else {

dec_ch = str[l];

l++;

}

// append character to string

276

if (!ExpString_AppendByte(&estr, dec_ch)) {

277

BLog(BLOG_ERROR, "ExpString_AppendChar failed");

goto string_fail1;

}

}

// make sure ending quote was found

283

if (l == left) {

284

BLog(BLOG_ERROR, "missing ending quote for string");

goto string_fail1;

}

// skip ending quote

289

l++;

290

291

token = NCD_TOKEN_STRING;

292

token_val = ExpString_Get(&estr);

293

token_len = ExpString_Length(&estr);

break;

string_fail1:

ExpString_Free(&estr);

string_fail0:

error = 1;

} while (0);

else if (is_space_char(*str)) {

token = 0;

l = 1;

}

else {

BLog(BLOG_ERROR, "unrecognized character");

error = 1;

}

out:

// report error

if (error) {

output(user, NCD_ERROR, NULL, 0, line, line_char);

return;

}

// output token

if (token) {

if (!output(user, token, token_val, token_len, line, line_char)) {

return;

}

}

// update line/char counters

325

for (size_t i = 0; i < l; i++) {

326

if (str[i] == '\n') {

line++;

line_char = 1;

} else {

line_char++;

}

}

str += l;

left -= l;

}

output(user, NCD_EOF, NULL, 0, line, line_char);

339

}

BadVPN – Blame information for rev 1