corrade-vassal – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 vero 1 #region Header
2 /*
3 * Lexer.cs
4 * JSON lexer implementation based on a finite state machine.
5 *
6 * The authors disclaim copyright to this source code. For more details, see
7 * the COPYING file included with this distribution.
8 */
9 #endregion
10  
11  
12 using System;
13 using System.Collections.Generic;
14 using System.IO;
15 using System.Text;
16  
17  
18 namespace LitJson
19 {
20 internal class FsmContext
21 {
22 public bool Return;
23 public int NextState;
24 public Lexer L;
25 public int StateStack;
26 }
27  
28  
29 internal class Lexer
30 {
31 #region Fields
32 private delegate bool StateHandler (FsmContext ctx);
33  
34 private static int[] fsm_return_table;
35 private static StateHandler[] fsm_handler_table;
36  
37 private bool allow_comments;
38 private bool allow_single_quoted_strings;
39 private bool end_of_input;
40 private FsmContext fsm_context;
41 private int input_buffer;
42 private int input_char;
43 private TextReader reader;
44 private int state;
45 private StringBuilder string_buffer;
46 private string string_value;
47 private int token;
48 private int unichar;
49 #endregion
50  
51  
52 #region Properties
53 public bool AllowComments {
54 get { return allow_comments; }
55 set { allow_comments = value; }
56 }
57  
58 public bool AllowSingleQuotedStrings {
59 get { return allow_single_quoted_strings; }
60 set { allow_single_quoted_strings = value; }
61 }
62  
63 public bool EndOfInput {
64 get { return end_of_input; }
65 }
66  
67 public int Token {
68 get { return token; }
69 }
70  
71 public string StringValue {
72 get { return string_value; }
73 }
74 #endregion
75  
76  
77 #region Constructors
78 static Lexer ()
79 {
80 PopulateFsmTables ();
81 }
82  
83 public Lexer (TextReader reader)
84 {
85 allow_comments = true;
86 allow_single_quoted_strings = true;
87  
88 input_buffer = 0;
89 string_buffer = new StringBuilder (128);
90 state = 1;
91 end_of_input = false;
92 this.reader = reader;
93  
94 fsm_context = new FsmContext ();
95 fsm_context.L = this;
96 }
97 #endregion
98  
99  
100 #region Static Methods
101 private static int HexValue (int digit)
102 {
103 switch (digit) {
104 case 'a':
105 case 'A':
106 return 10;
107  
108 case 'b':
109 case 'B':
110 return 11;
111  
112 case 'c':
113 case 'C':
114 return 12;
115  
116 case 'd':
117 case 'D':
118 return 13;
119  
120 case 'e':
121 case 'E':
122 return 14;
123  
124 case 'f':
125 case 'F':
126 return 15;
127  
128 default:
129 return digit - '0';
130 }
131 }
132  
133 private static void PopulateFsmTables ()
134 {
135 fsm_handler_table = new StateHandler[28] {
136 State1,
137 State2,
138 State3,
139 State4,
140 State5,
141 State6,
142 State7,
143 State8,
144 State9,
145 State10,
146 State11,
147 State12,
148 State13,
149 State14,
150 State15,
151 State16,
152 State17,
153 State18,
154 State19,
155 State20,
156 State21,
157 State22,
158 State23,
159 State24,
160 State25,
161 State26,
162 State27,
163 State28
164 };
165  
166 fsm_return_table = new int[28] {
167 (int) ParserToken.Char,
168 0,
169 (int) ParserToken.Number,
170 (int) ParserToken.Number,
171 0,
172 (int) ParserToken.Number,
173 0,
174 (int) ParserToken.Number,
175 0,
176 0,
177 (int) ParserToken.True,
178 0,
179 0,
180 0,
181 (int) ParserToken.False,
182 0,
183 0,
184 (int) ParserToken.Null,
185 (int) ParserToken.CharSeq,
186 (int) ParserToken.Char,
187 0,
188 0,
189 (int) ParserToken.CharSeq,
190 (int) ParserToken.Char,
191 0,
192 0,
193 0,
194  
195 };
196 }
197  
198 private static char ProcessEscChar (int esc_char)
199 {
200 switch (esc_char) {
201 case '"':
202 case '\'':
203 case '\\':
204 case '/':
205 return Convert.ToChar (esc_char);
206  
207 case 'n':
208 return '\n';
209  
210 case 't':
211 return '\t';
212  
213 case 'r':
214 return '\r';
215  
216 case 'b':
217 return '\b';
218  
219 case 'f':
220 return '\f';
221  
222 default:
223 // Unreachable
224 return '?';
225 }
226 }
227  
228 private static bool State1 (FsmContext ctx)
229 {
230 while (ctx.L.GetChar ()) {
231 if (ctx.L.input_char == ' ' ||
232 ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r')
233 continue;
234  
235 if (ctx.L.input_char >= '1' && ctx.L.input_char <= '9') {
236 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
237 ctx.NextState = 3;
238 return true;
239 }
240  
241 switch (ctx.L.input_char) {
242 case '"':
243 ctx.NextState = 19;
244 ctx.Return = true;
245 return true;
246  
247 case ',':
248 case ':':
249 case '[':
250 case ']':
251 case '{':
252 case '}':
253 ctx.NextState = 1;
254 ctx.Return = true;
255 return true;
256  
257 case '-':
258 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
259 ctx.NextState = 2;
260 return true;
261  
262 case '0':
263 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
264 ctx.NextState = 4;
265 return true;
266  
267 case 'f':
268 ctx.NextState = 12;
269 return true;
270  
271 case 'n':
272 ctx.NextState = 16;
273 return true;
274  
275 case 't':
276 ctx.NextState = 9;
277 return true;
278  
279 case '\'':
280 if (! ctx.L.allow_single_quoted_strings)
281 return false;
282  
283 ctx.L.input_char = '"';
284 ctx.NextState = 23;
285 ctx.Return = true;
286 return true;
287  
288 case '/':
289 if (! ctx.L.allow_comments)
290 return false;
291  
292 ctx.NextState = 25;
293 return true;
294  
295 default:
296 return false;
297 }
298 }
299  
300 return true;
301 }
302  
303 private static bool State2 (FsmContext ctx)
304 {
305 ctx.L.GetChar ();
306  
307 if (ctx.L.input_char >= '1' && ctx.L.input_char<= '9') {
308 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
309 ctx.NextState = 3;
310 return true;
311 }
312  
313 switch (ctx.L.input_char) {
314 case '0':
315 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
316 ctx.NextState = 4;
317 return true;
318  
319 default:
320 return false;
321 }
322 }
323  
324 private static bool State3 (FsmContext ctx)
325 {
326 while (ctx.L.GetChar ()) {
327 if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
328 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
329 continue;
330 }
331  
332 if (ctx.L.input_char == ' ' ||
333 ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
334 ctx.Return = true;
335 ctx.NextState = 1;
336 return true;
337 }
338  
339 switch (ctx.L.input_char) {
340 case ',':
341 case ']':
342 case '}':
343 ctx.L.UngetChar ();
344 ctx.Return = true;
345 ctx.NextState = 1;
346 return true;
347  
348 case '.':
349 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
350 ctx.NextState = 5;
351 return true;
352  
353 case 'e':
354 case 'E':
355 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
356 ctx.NextState = 7;
357 return true;
358  
359 default:
360 return false;
361 }
362 }
363 return true;
364 }
365  
366 private static bool State4 (FsmContext ctx)
367 {
368 ctx.L.GetChar ();
369  
370 if (ctx.L.input_char == ' ' ||
371 ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
372 ctx.Return = true;
373 ctx.NextState = 1;
374 return true;
375 }
376  
377 switch (ctx.L.input_char) {
378 case ',':
379 case ']':
380 case '}':
381 ctx.L.UngetChar ();
382 ctx.Return = true;
383 ctx.NextState = 1;
384 return true;
385  
386 case '.':
387 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
388 ctx.NextState = 5;
389 return true;
390  
391 case 'e':
392 case 'E':
393 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
394 ctx.NextState = 7;
395 return true;
396  
397 default:
398 return false;
399 }
400 }
401  
402 private static bool State5 (FsmContext ctx)
403 {
404 ctx.L.GetChar ();
405  
406 if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
407 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
408 ctx.NextState = 6;
409 return true;
410 }
411  
412 return false;
413 }
414  
415 private static bool State6 (FsmContext ctx)
416 {
417 while (ctx.L.GetChar ()) {
418 if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9') {
419 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
420 continue;
421 }
422  
423 if (ctx.L.input_char == ' ' ||
424 ctx.L.input_char >= '\t' && ctx.L.input_char <= '\r') {
425 ctx.Return = true;
426 ctx.NextState = 1;
427 return true;
428 }
429  
430 switch (ctx.L.input_char) {
431 case ',':
432 case ']':
433 case '}':
434 ctx.L.UngetChar ();
435 ctx.Return = true;
436 ctx.NextState = 1;
437 return true;
438  
439 case 'e':
440 case 'E':
441 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
442 ctx.NextState = 7;
443 return true;
444  
445 default:
446 return false;
447 }
448 }
449  
450 return true;
451 }
452  
453 private static bool State7 (FsmContext ctx)
454 {
455 ctx.L.GetChar ();
456  
457 if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
458 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
459 ctx.NextState = 8;
460 return true;
461 }
462  
463 switch (ctx.L.input_char) {
464 case '+':
465 case '-':
466 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
467 ctx.NextState = 8;
468 return true;
469  
470 default:
471 return false;
472 }
473 }
474  
475 private static bool State8 (FsmContext ctx)
476 {
477 while (ctx.L.GetChar ()) {
478 if (ctx.L.input_char >= '0' && ctx.L.input_char<= '9') {
479 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
480 continue;
481 }
482  
483 if (ctx.L.input_char == ' ' ||
484 ctx.L.input_char >= '\t' && ctx.L.input_char<= '\r') {
485 ctx.Return = true;
486 ctx.NextState = 1;
487 return true;
488 }
489  
490 switch (ctx.L.input_char) {
491 case ',':
492 case ']':
493 case '}':
494 ctx.L.UngetChar ();
495 ctx.Return = true;
496 ctx.NextState = 1;
497 return true;
498  
499 default:
500 return false;
501 }
502 }
503  
504 return true;
505 }
506  
507 private static bool State9 (FsmContext ctx)
508 {
509 ctx.L.GetChar ();
510  
511 switch (ctx.L.input_char) {
512 case 'r':
513 ctx.NextState = 10;
514 return true;
515  
516 default:
517 return false;
518 }
519 }
520  
521 private static bool State10 (FsmContext ctx)
522 {
523 ctx.L.GetChar ();
524  
525 switch (ctx.L.input_char) {
526 case 'u':
527 ctx.NextState = 11;
528 return true;
529  
530 default:
531 return false;
532 }
533 }
534  
535 private static bool State11 (FsmContext ctx)
536 {
537 ctx.L.GetChar ();
538  
539 switch (ctx.L.input_char) {
540 case 'e':
541 ctx.Return = true;
542 ctx.NextState = 1;
543 return true;
544  
545 default:
546 return false;
547 }
548 }
549  
550 private static bool State12 (FsmContext ctx)
551 {
552 ctx.L.GetChar ();
553  
554 switch (ctx.L.input_char) {
555 case 'a':
556 ctx.NextState = 13;
557 return true;
558  
559 default:
560 return false;
561 }
562 }
563  
564 private static bool State13 (FsmContext ctx)
565 {
566 ctx.L.GetChar ();
567  
568 switch (ctx.L.input_char) {
569 case 'l':
570 ctx.NextState = 14;
571 return true;
572  
573 default:
574 return false;
575 }
576 }
577  
578 private static bool State14 (FsmContext ctx)
579 {
580 ctx.L.GetChar ();
581  
582 switch (ctx.L.input_char) {
583 case 's':
584 ctx.NextState = 15;
585 return true;
586  
587 default:
588 return false;
589 }
590 }
591  
592 private static bool State15 (FsmContext ctx)
593 {
594 ctx.L.GetChar ();
595  
596 switch (ctx.L.input_char) {
597 case 'e':
598 ctx.Return = true;
599 ctx.NextState = 1;
600 return true;
601  
602 default:
603 return false;
604 }
605 }
606  
607 private static bool State16 (FsmContext ctx)
608 {
609 ctx.L.GetChar ();
610  
611 switch (ctx.L.input_char) {
612 case 'u':
613 ctx.NextState = 17;
614 return true;
615  
616 default:
617 return false;
618 }
619 }
620  
621 private static bool State17 (FsmContext ctx)
622 {
623 ctx.L.GetChar ();
624  
625 switch (ctx.L.input_char) {
626 case 'l':
627 ctx.NextState = 18;
628 return true;
629  
630 default:
631 return false;
632 }
633 }
634  
635 private static bool State18 (FsmContext ctx)
636 {
637 ctx.L.GetChar ();
638  
639 switch (ctx.L.input_char) {
640 case 'l':
641 ctx.Return = true;
642 ctx.NextState = 1;
643 return true;
644  
645 default:
646 return false;
647 }
648 }
649  
650 private static bool State19 (FsmContext ctx)
651 {
652 while (ctx.L.GetChar ()) {
653 switch (ctx.L.input_char) {
654 case '"':
655 ctx.L.UngetChar ();
656 ctx.Return = true;
657 ctx.NextState = 20;
658 return true;
659  
660 case '\\':
661 ctx.StateStack = 19;
662 ctx.NextState = 21;
663 return true;
664  
665 default:
666 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
667 continue;
668 }
669 }
670  
671 return true;
672 }
673  
674 private static bool State20 (FsmContext ctx)
675 {
676 ctx.L.GetChar ();
677  
678 switch (ctx.L.input_char) {
679 case '"':
680 ctx.Return = true;
681 ctx.NextState = 1;
682 return true;
683  
684 default:
685 return false;
686 }
687 }
688  
689 private static bool State21 (FsmContext ctx)
690 {
691 ctx.L.GetChar ();
692  
693 switch (ctx.L.input_char) {
694 case 'u':
695 ctx.NextState = 22;
696 return true;
697  
698 case '"':
699 case '\'':
700 case '/':
701 case '\\':
702 case 'b':
703 case 'f':
704 case 'n':
705 case 'r':
706 case 't':
707 ctx.L.string_buffer.Append (
708 ProcessEscChar (ctx.L.input_char));
709 ctx.NextState = ctx.StateStack;
710 return true;
711  
712 default:
713 return false;
714 }
715 }
716  
717 private static bool State22 (FsmContext ctx)
718 {
719 int counter = 0;
720 int mult = 4096;
721  
722 ctx.L.unichar = 0;
723  
724 while (ctx.L.GetChar ()) {
725  
726 if (ctx.L.input_char >= '0' && ctx.L.input_char <= '9' ||
727 ctx.L.input_char >= 'A' && ctx.L.input_char <= 'F' ||
728 ctx.L.input_char >= 'a' && ctx.L.input_char <= 'f') {
729  
730 ctx.L.unichar += HexValue (ctx.L.input_char) * mult;
731  
732 counter++;
733 mult /= 16;
734  
735 if (counter == 4) {
736 ctx.L.string_buffer.Append (
737 Convert.ToChar (ctx.L.unichar));
738 ctx.NextState = ctx.StateStack;
739 return true;
740 }
741  
742 continue;
743 }
744  
745 return false;
746 }
747  
748 return true;
749 }
750  
751 private static bool State23 (FsmContext ctx)
752 {
753 while (ctx.L.GetChar ()) {
754 switch (ctx.L.input_char) {
755 case '\'':
756 ctx.L.UngetChar ();
757 ctx.Return = true;
758 ctx.NextState = 24;
759 return true;
760  
761 case '\\':
762 ctx.StateStack = 23;
763 ctx.NextState = 21;
764 return true;
765  
766 default:
767 ctx.L.string_buffer.Append ((char) ctx.L.input_char);
768 continue;
769 }
770 }
771  
772 return true;
773 }
774  
775 private static bool State24 (FsmContext ctx)
776 {
777 ctx.L.GetChar ();
778  
779 switch (ctx.L.input_char) {
780 case '\'':
781 ctx.L.input_char = '"';
782 ctx.Return = true;
783 ctx.NextState = 1;
784 return true;
785  
786 default:
787 return false;
788 }
789 }
790  
791 private static bool State25 (FsmContext ctx)
792 {
793 ctx.L.GetChar ();
794  
795 switch (ctx.L.input_char) {
796 case '*':
797 ctx.NextState = 27;
798 return true;
799  
800 case '/':
801 ctx.NextState = 26;
802 return true;
803  
804 default:
805 return false;
806 }
807 }
808  
809 private static bool State26 (FsmContext ctx)
810 {
811 while (ctx.L.GetChar ()) {
812 if (ctx.L.input_char == '\n') {
813 ctx.NextState = 1;
814 return true;
815 }
816 }
817  
818 return true;
819 }
820  
821 private static bool State27 (FsmContext ctx)
822 {
823 while (ctx.L.GetChar ()) {
824 if (ctx.L.input_char == '*') {
825 ctx.NextState = 28;
826 return true;
827 }
828 }
829  
830 return true;
831 }
832  
833 private static bool State28 (FsmContext ctx)
834 {
835 while (ctx.L.GetChar ()) {
836 if (ctx.L.input_char == '*')
837 continue;
838  
839 if (ctx.L.input_char == '/') {
840 ctx.NextState = 1;
841 return true;
842 }
843  
844 ctx.NextState = 27;
845 return true;
846 }
847  
848 return true;
849 }
850 #endregion
851  
852  
853 private bool GetChar ()
854 {
855 if ((input_char = NextChar ()) != -1)
856 return true;
857  
858 end_of_input = true;
859 return false;
860 }
861  
862 private int NextChar ()
863 {
864 if (input_buffer != 0) {
865 int tmp = input_buffer;
866 input_buffer = 0;
867  
868 return tmp;
869 }
870  
871 return reader.Read ();
872 }
873  
874 public bool NextToken ()
875 {
876 StateHandler handler;
877 fsm_context.Return = false;
878  
879 while (true) {
880 handler = fsm_handler_table[state - 1];
881  
882 if (! handler (fsm_context))
883 throw new JsonException (input_char);
884  
885 if (end_of_input)
886 return false;
887  
888 if (fsm_context.Return) {
889 string_value = string_buffer.ToString ();
890 string_buffer.Remove (0, string_buffer.Length);
891 token = fsm_return_table[state - 1];
892  
893 if (token == (int) ParserToken.Char)
894 token = input_char;
895  
896 state = fsm_context.NextState;
897  
898 return true;
899 }
900  
901 state = fsm_context.NextState;
902 }
903 }
904  
905 private void UngetChar ()
906 {
907 input_buffer = input_char;
908 }
909 }
910 }