BadVPN – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | <?php |
2 | /* |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU Library General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
16 | */ |
||
17 | |||
18 | |||
19 | define('LIME_CALL_PROTOCOL', '$tokens, &$result'); |
||
20 | |||
21 | abstract class lime_parser { |
||
22 | } |
||
23 | |||
24 | class parse_error extends Exception {} # If this happens, the input doesn't match the grammar. |
||
25 | class parse_bug extends Exception {} # If this happens, I made a mistake. |
||
26 | |||
27 | class parse_unexpected_token extends parse_error { |
||
28 | function __construct($type, $state) { |
||
29 | parent::__construct("Unexpected token of type ($type)"); |
||
30 | $this->type = $type; |
||
31 | $this->state = $state; |
||
32 | } |
||
33 | } |
||
34 | class parse_premature_eof extends parse_error { |
||
35 | function __construct() { |
||
36 | parent::__construct("Premature EOF"); |
||
37 | } |
||
38 | } |
||
39 | |||
40 | |||
41 | class parse_stack { |
||
42 | function __construct($qi) { |
||
43 | $this->q = $qi; |
||
44 | $this->qs = array(); |
||
45 | $this->ss = array(); |
||
46 | } |
||
47 | function shift($q, $semantic) { |
||
48 | $this->ss[] = $semantic; |
||
49 | $this->qs[] = $this->q; |
||
50 | $this->q = $q; |
||
51 | # echo "Shift $q -- $semantic<br/>\n"; |
||
52 | } |
||
53 | function top_n($n) { |
||
54 | if (!$n) return array(); |
||
55 | return array_slice($this->ss, 0-$n); |
||
56 | } |
||
57 | function pop_n($n) { |
||
58 | if (!$n) return array(); |
||
59 | $qq = array_splice($this->qs, 0-$n); |
||
60 | $this->q = $qq[0]; |
||
61 | return array_splice($this->ss, 0-$n); |
||
62 | } |
||
63 | function occupied() { return !empty($this->ss); } |
||
64 | function index($n) { |
||
65 | if ($n) $this->q = $this->qs[count($this->qs)-$n]; |
||
66 | } |
||
67 | function text() { |
||
68 | return $this->q." : ".implode(' . ', array_reverse($this->qs)); |
||
69 | } |
||
70 | } |
||
71 | class parse_engine { |
||
72 | function __construct($parser) { |
||
73 | $this->parser = $parser; |
||
74 | $this->qi = $parser->qi; |
||
75 | $this->rule = $parser->a; |
||
76 | $this->step = $parser->i; |
||
77 | #$this->prepare_callables(); |
||
78 | $this->reset(); |
||
79 | #$this->debug = false; |
||
80 | } |
||
81 | function reset() { |
||
82 | $this->accept = false; |
||
83 | $this->stack = new parse_stack($this->qi); |
||
84 | } |
||
85 | private function enter_error_tolerant_state() { |
||
86 | while ($this->stack->occupied()) { |
||
87 | if ($this->has_step_for('error')) return true; |
||
88 | $this->drop(); |
||
89 | }; |
||
90 | return false; |
||
91 | } |
||
92 | private function drop() { $this->stack->pop_n(1); } |
||
93 | function eat_eof() { |
||
94 | {/* |
||
95 | |||
96 | So that I don't get any brilliant misguided ideas: |
||
97 | |||
98 | The "accept" step happens when we try to eat a start symbol. |
||
99 | That happens because the reductions up the stack at the end |
||
100 | finally (and symetrically) tell the parser to eat a symbol |
||
101 | representing what they've just shifted off the end of the stack |
||
102 | and reduced. However, that doesn't put the parser into any |
||
103 | special different state. Therefore, it's back at the start |
||
104 | state. |
||
105 | |||
106 | That being said, the parser is ready to reduce an EOF to the |
||
107 | empty program, if given a grammar that allows them. |
||
108 | |||
109 | So anyway, if you literally tell the parser to eat an EOF |
||
110 | symbol, then after it's done reducing and accepting the prior |
||
111 | program, it's going to think it has another symbol to deal with. |
||
112 | That is the EOF symbol, which means to reduce the empty program, |
||
113 | accept it, and then continue trying to eat the terminal EOF. |
||
114 | |||
115 | This infinte loop quickly runs out of memory. |
||
116 | |||
117 | That's why the real EOF algorithm doesn't try to pretend that |
||
118 | EOF is a terminal. Like the invented start symbol, it's special. |
||
119 | |||
120 | Instead, we pretend to want to eat EOF, but never actually |
||
121 | try to get it into the parse stack. (It won't fit.) In short, |
||
122 | we look up what reduction is indicated at each step in the |
||
123 | process of rolling up the parse stack. |
||
124 | |||
125 | The repetition is because one reduction is not guaranteed to |
||
126 | cascade into another and clean up the entire parse stack. |
||
127 | Rather, it will instead shift each partial production as it |
||
128 | is forced to completion by the EOF lookahead. |
||
129 | */} |
||
130 | |||
131 | # We must reduce as if having read the EOF symbol |
||
132 | do { |
||
133 | # and we have to try at least once, because if nothing |
||
134 | # has ever been shifted, then the stack will be empty |
||
135 | # at the start. |
||
136 | list($opcode, $operand) = $this->step_for('#'); |
||
137 | switch ($opcode) { |
||
138 | case 'r': $this->reduce($operand); break; |
||
139 | case 'e': $this->premature_eof(); break; |
||
140 | default: throw new parse_bug(); break; |
||
141 | } |
||
142 | } while ($this->stack->occupied()); |
||
143 | {/* |
||
144 | If the sentence is well-formed according to the grammar, then |
||
145 | this will eventually result in eating a start symbol, which |
||
146 | causes the "accept" instruction to fire. Otherwise, the |
||
147 | step('#') method will indicate an error in the syntax, which |
||
148 | here means a premature EOF. |
||
149 | |||
150 | Incedentally, some tremendous amount of voodoo with the parse |
||
151 | stack might help find the beginning of some unfinished |
||
152 | production that the sentence was cut off during, but as a |
||
153 | general rule that would require deeper knowledge. |
||
154 | */} |
||
155 | if (!$this->accept) throw new parse_bug(); |
||
156 | return $this->semantic; |
||
157 | } |
||
158 | private function premature_eof() { |
||
159 | $seen = array(); |
||
160 | while ($this->enter_error_tolerant_state()) { |
||
161 | if (isset($seen[$this->state()])) { |
||
162 | // This means that it's pointless to try here. |
||
163 | // We're guaranteed that the stack is occupied. |
||
164 | $this->drop(); |
||
165 | continue; |
||
166 | } |
||
167 | $seen[$this->state()] = true; |
||
168 | |||
169 | $this->eat('error', NULL); |
||
170 | if ($this->has_step_for('#')) { |
||
171 | // Good. We can continue as normal. |
||
172 | return; |
||
173 | } else { |
||
174 | // That attempt to resolve the error condition |
||
175 | // did not work. There's no point trying to |
||
176 | // figure out how much to slice off the stack. |
||
177 | // The rest of the algorithm will make it happen. |
||
178 | } |
||
179 | } |
||
180 | throw new parse_premature_eof(); |
||
181 | } |
||
182 | private function current_row() { return $this->step[$this->state()]; } |
||
183 | private function step_for($type) { |
||
184 | $row = $this->current_row(); |
||
185 | if (!isset($row[$type])) return array('e', $this->stack->q); |
||
186 | return explode(' ', $row[$type]); |
||
187 | } |
||
188 | private function has_step_for($type) { |
||
189 | $row = $this->current_row(); |
||
190 | return isset($row[$type]); |
||
191 | } |
||
192 | private function state() { return $this->stack->q; } |
||
193 | function eat($type, $semantic) { |
||
194 | # assert('$type == trim($type)'); |
||
195 | # if ($this->debug) echo "Trying to eat a ($type)\n"; |
||
196 | list($opcode, $operand) = $this->step_for($type); |
||
197 | switch ($opcode) { |
||
198 | case 's': |
||
199 | # if ($this->debug) echo "shift $type to state $operand\n"; |
||
200 | $this->stack->shift($operand, $semantic); |
||
201 | # echo $this->stack->text()." shift $type<br/>\n"; |
||
202 | break; |
||
203 | |||
204 | case 'r': |
||
205 | $this->reduce($operand); |
||
206 | $this->eat($type, $semantic); |
||
207 | # Yes, this is tail-recursive. It's also the simplest way. |
||
208 | break; |
||
209 | |||
210 | case 'a': |
||
211 | if ($this->stack->occupied()) throw new parse_bug('Accept should happen with empty stack.'); |
||
212 | $this->accept = true; |
||
213 | #if ($this->debug) echo ("Accept\n\n"); |
||
214 | $this->semantic = $semantic; |
||
215 | break; |
||
216 | |||
217 | case 'e': |
||
218 | # This is thought to be the uncommon, exceptional path, so |
||
219 | # it's OK that this algorithm will cause the stack to |
||
220 | # flutter while the parse engine waits for an edible token. |
||
221 | # if ($this->debug) echo "($type) causes a problem.\n"; |
||
222 | if ($this->enter_error_tolerant_state()) { |
||
223 | $this->eat('error', NULL); |
||
224 | if ($this->has_step_for($type)) $this->eat($type, $semantic); |
||
225 | } else { |
||
226 | # If that didn't work, give up: |
||
227 | throw new parse_error("Parse Error: ($type)($semantic) not expected"); |
||
228 | } |
||
229 | break; |
||
230 | |||
231 | default: |
||
232 | throw new parse_bug("Bad parse table instruction ".htmlspecialchars($opcode)); |
||
233 | } |
||
234 | } |
||
235 | private function reduce($rule_id) { |
||
236 | $rule = $this->rule[$rule_id]; |
||
237 | $len = $rule['len']; |
||
238 | $semantic = $this->perform_action($rule_id, $this->stack->top_n($len)); |
||
239 | #echo $semantic.br(); |
||
240 | if ($rule['replace']) $this->stack->pop_n($len); |
||
241 | else $this->stack->index($len); |
||
242 | $this->eat($rule['symbol'], $semantic); |
||
243 | } |
||
244 | private function perform_action($rule_id, $slice) { |
||
245 | # we have this weird calling convention.... |
||
246 | $result = null; |
||
247 | $method = $this->parser->method[$rule_id]; |
||
248 | #if ($this->debug) echo "rule $id: $method\n"; |
||
249 | $this->parser->$method($slice, $result); |
||
250 | return $result; |
||
251 | } |
||
252 | } |