BadVPN – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | <?php |
2 | /* |
||
3 | * This program is free software; you can redistribute it and/or modify |
||
4 | * it under the terms of the GNU General Public License as published by |
||
5 | * the Free Software Foundation; either version 2 of the License, or |
||
6 | * (at your option) any later version. |
||
7 | * |
||
8 | * This program is distributed in the hope that it will be useful, |
||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
11 | * GNU Library General Public License for more details. |
||
12 | * |
||
13 | * You should have received a copy of the GNU General Public License |
||
14 | * along with this program; if not, write to the Free Software |
||
15 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
||
16 | */ |
||
17 | |||
18 | define('LIME_DIR', dirname(__FILE__)); |
||
19 | |||
20 | function emit($str) { fputs(STDERR, $str."\n"); } |
||
21 | |||
22 | class Bug extends Exception {} |
||
23 | function bug($gripe='Bug found.') { throw new Bug($gripe); } |
||
24 | function bug_if($falacy, $gripe='Bug found.') { if ($falacy) throw new Bug($gripe); } |
||
25 | function bug_unless($assertion, $gripe='Bug found.') { if (!$assertion) throw new Bug($gripe); } |
||
26 | |||
27 | include_once(LIME_DIR.'/parse_engine.php'); |
||
28 | include_once(LIME_DIR.'/set.so.php'); |
||
29 | include_once(LIME_DIR.'/flex_token_stream.php'); |
||
30 | |||
31 | function lime_token_reference($pos) { return '$tokens['.$pos.']'; } |
||
32 | function lime_token_reference_callback($foo) { return lime_token_reference($foo[1]-1); } |
||
33 | |||
34 | class cf_action { |
||
35 | function __construct($code) { $this->code=$code; } |
||
36 | } |
||
37 | class step { |
||
38 | /* |
||
39 | Base class for parse table instructions. The main idea is to make the |
||
40 | subclasses responsible for conflict resolution among themselves. It also |
||
41 | forms a sort of interface to the parse table. |
||
42 | */ |
||
43 | function __construct($sym) { |
||
44 | bug_unless($sym instanceof sym); |
||
45 | $this->sym = $sym; |
||
46 | } |
||
47 | function glyph() { return $this->sym->name; } |
||
48 | } |
||
49 | class error extends step { |
||
50 | function sane() { return false; } |
||
51 | function instruction() { bug("This should not happen."); } |
||
52 | function decide($that) { return $this; /* An error shall remain one. */ } |
||
53 | } |
||
54 | class shift extends step { |
||
55 | function __construct($sym, $q) { |
||
56 | parent::__construct($sym); |
||
57 | $this->q = $q; |
||
58 | } |
||
59 | function sane() { return true; } |
||
60 | function instruction() { return "s $this->q"; } |
||
61 | function decide($that) { |
||
62 | # shift-shift conflicts are impossible. |
||
63 | # shift-accept conflicts are a bug. |
||
64 | # so we can infer: |
||
65 | bug_unless($that instanceof reduce); |
||
66 | |||
67 | # That being said, the resolution is a matter of precedence. |
||
68 | $shift_prec = $this->sym->right_prec; |
||
69 | $reduce_prec = $that->rule->prec; |
||
70 | |||
71 | # If we don't have defined precedence levels for both options, |
||
72 | # then we default to shifting: |
||
73 | if (!($shift_prec and $reduce_prec)) return $this; |
||
74 | |||
75 | # Otherwise, use the step with higher precedence. |
||
76 | if ($shift_prec > $reduce_prec) return $this; |
||
77 | if ($reduce_prec > $shift_prec) return $that; |
||
78 | |||
79 | # The "nonassoc" works by giving equal precedence to both options, |
||
80 | # which means to put an error instruction in the parse table. |
||
81 | return new error($this->sym); |
||
82 | } |
||
83 | } |
||
84 | class reduce extends step { |
||
85 | function __construct($sym, $rule) { |
||
86 | bug_unless($rule instanceof rule); |
||
87 | parent::__construct($sym); |
||
88 | $this->rule = $rule; |
||
89 | } |
||
90 | function sane() { return true; } |
||
91 | function instruction() { return 'r '.$this->rule->id; } |
||
92 | function decide($that) { |
||
93 | # This means that the input grammar has a reduce-reduce conflict. |
||
94 | # Such things are considered an error in the input. |
||
95 | throw new RRC($this, $that); |
||
96 | #exit(1); |
||
97 | # BISON would go with the first encountered reduce thus: |
||
98 | # return $this; |
||
99 | } |
||
100 | } |
||
101 | class accept extends step { |
||
102 | function __construct($sym) { parent::__construct($sym); } |
||
103 | function sane() { return true; } |
||
104 | function instruction() { return 'a '.$this->sym->name; } |
||
105 | } |
||
106 | class RRC extends Exception { |
||
107 | function __construct($a, $b) { |
||
108 | parent::__construct("Reduce-Reduce Conflict"); |
||
109 | $this->a = $a; |
||
110 | $this->b = $b; |
||
111 | } |
||
112 | function make_noise() { |
||
113 | emit(sprintf( |
||
114 | "Reduce-Reduce Conflict:\n%s\n%s\nLookahead is (%s)", |
||
115 | $this->a->rule->text(), |
||
116 | $this->b->rule->text(), |
||
117 | $this->a->glyph() |
||
118 | )); |
||
119 | } |
||
120 | } |
||
121 | class state { |
||
122 | function __construct($id, $key, $close) { |
||
123 | $this->id = $id; |
||
124 | $this->key = $key; |
||
125 | $this->close = $close; # config key -> object |
||
126 | ksort($this->close); |
||
127 | $this->action = array(); |
||
128 | } |
||
129 | function dump() { |
||
130 | echo " * ".$this->id.' / '.$this->key."\n"; |
||
131 | foreach ($this->close as $config) $config->dump(); |
||
132 | } |
||
133 | function add_shift($sym, $state) { |
||
134 | $this->add_instruction(new shift($sym, $state->id)); |
||
135 | } |
||
136 | function add_reduce($sym, $rule) { |
||
137 | $this->add_instruction(new reduce($sym, $rule)); |
||
138 | } |
||
139 | function add_accept($sym) { |
||
140 | $this->add_instruction(new accept($sym)); |
||
141 | } |
||
142 | function add_instruction($step) { |
||
143 | bug_unless($step instanceof step); |
||
144 | $this->action[] = $step; |
||
145 | } |
||
146 | function find_reductions($lime) { |
||
147 | # rightmost configurations followset yields reduce. |
||
148 | foreach($this->close as $c) { |
||
149 | if ($c->rightmost) { |
||
150 | foreach ($c->follow->all() as $glyph) $this->add_reduce($lime->sym($glyph), $c->rule); |
||
151 | } |
||
152 | } |
||
153 | } |
||
154 | function resolve_conflicts() { |
||
155 | # For each possible lookahead, find one (and only one) step to take. |
||
156 | $table = array(); |
||
157 | foreach ($this->action as $step) { |
||
158 | $glyph = $step->glyph(); |
||
159 | if (isset($table[$glyph])) { |
||
160 | # There's a conflict. The shifts all came first, which |
||
161 | # simplifies the coding for the step->decide() methods. |
||
162 | try { |
||
163 | $table[$glyph] = $table[$glyph]->decide($step); |
||
164 | } catch (RRC $e) { |
||
165 | emit("State $this->id:"); |
||
166 | $e->make_noise(); |
||
167 | } |
||
168 | } else { |
||
169 | # This glyph is yet unprocessed, so the step at hand is |
||
170 | # our best current guess at what the grammar indicates. |
||
171 | $table[$glyph] = $step; |
||
172 | } |
||
173 | } |
||
174 | |||
175 | # Now that we have the correct steps chosen, this routine is oddly |
||
176 | # also responsible for turning that table into the form that will |
||
177 | # eventually be passed to the parse engine. (So FIXME?) |
||
178 | $out = array(); |
||
179 | foreach ($table as $glyph => $step) { |
||
180 | if ($step->sane()) $out[$glyph] = $step->instruction(); |
||
181 | } |
||
182 | return $out; |
||
183 | } |
||
184 | function segment_config() { |
||
185 | # Filter $this->close into categories based on the symbol_after_the_dot. |
||
186 | $f = array(); |
||
187 | foreach ($this->close as $c) { |
||
188 | $p = $c->symbol_after_the_dot; |
||
189 | if (!$p) continue; |
||
190 | $f[$p->name][] = $c; |
||
191 | } |
||
192 | return $f; |
||
193 | } |
||
194 | } |
||
195 | class sym { |
||
196 | function __construct($name, $id) { |
||
197 | $this->name=$name; |
||
198 | $this->id=$id; |
||
199 | $this->term = true; # Until proven otherwise. |
||
200 | $this->rule = array(); |
||
201 | $this->config = array(); |
||
202 | $this->lambda = false; |
||
203 | $this->first = new set(); |
||
204 | $this->left_prec = $this->right_prec = 0; |
||
205 | } |
||
206 | function summary() { |
||
207 | $out = ''; |
||
208 | foreach ($this->rule as $rule) $out .= $rule->text()."\n"; |
||
209 | return $out; |
||
210 | } |
||
211 | } |
||
212 | class rule { |
||
213 | function __construct($id, $sym, $rhs, $code, $look, $replace) { |
||
214 | $this->id = $id; |
||
215 | $this->sym = $sym; |
||
216 | $this->rhs = $rhs; |
||
217 | $this->code = $code; |
||
218 | $this->look = $look; |
||
219 | bug_unless(is_int($look)); |
||
220 | $this->replace = $replace; |
||
221 | #$this->prec_sym = $prec_sym; |
||
222 | $this->prec = 0; |
||
223 | $this->first = array(); |
||
224 | $this->epsilon = count($rhs); |
||
225 | } |
||
226 | function lhs_glyph() { return $this->sym->name; } |
||
227 | function determine_precedence() { |
||
228 | # We may eventually expand to allow explicit prec_symbol declarations. |
||
229 | # Until then, we'll go with the rightmost terminal, which is what |
||
230 | # BISON does. People probably expect that. The leftmost terminal |
||
231 | # is a reasonable alternative behaviour, but I don't see the big |
||
232 | # deal just now. |
||
233 | |||
234 | #$prec_sym = $this->prec_sym; |
||
235 | #if (!$prec_sym) |
||
236 | $prec_sym = $this->rightmost_terminal(); |
||
237 | if (!$prec_sym) return; |
||
238 | $this->prec = $prec_sym->left_prec; |
||
239 | } |
||
240 | private function rightmost_terminal() { |
||
241 | $symbol = NULL; |
||
242 | $rhs = $this->rhs; |
||
243 | while ($rhs) { |
||
244 | $symbol = array_pop($rhs); |
||
245 | if ($symbol->term) break; |
||
246 | } |
||
247 | return $symbol; |
||
248 | } |
||
249 | function text() { |
||
250 | $t = "($this->id) ".$this->lhs_glyph().' :='; |
||
251 | foreach($this->rhs as $s) $t .= ' '.$s->name; |
||
252 | return $t; |
||
253 | } |
||
254 | function table(lime_language $lang) { |
||
255 | return array( |
||
256 | 'symbol' => $this->lhs_glyph(), |
||
257 | 'len' => $this->look, |
||
258 | 'replace' => $this->replace, |
||
259 | 'code' => $lang->fixup($this->code), |
||
260 | 'text' => $this->text(), |
||
261 | ); |
||
262 | } |
||
263 | function lambda() { |
||
264 | foreach ($this->rhs as $sym) if (!$sym->lambda) return false; |
||
265 | return true; |
||
266 | } |
||
267 | function find_first() { |
||
268 | $dot = count($this->rhs); |
||
269 | $last = $this->first[$dot] = new set(); |
||
270 | while ($dot) { |
||
271 | $dot--; |
||
272 | $symbol_after_the_dot = $this->rhs[$dot]; |
||
273 | $first = $symbol_after_the_dot->first->all(); |
||
274 | bug_if(empty($first) and !$symbol_after_the_dot->lambda); |
||
275 | $set = new set($first); |
||
276 | if ($symbol_after_the_dot->lambda) { |
||
277 | $set->union($last); |
||
278 | if ($this->epsilon == $dot+1) $this->epsilon = $dot; |
||
279 | } |
||
280 | $last = $this->first[$dot] = $set; |
||
281 | } |
||
282 | } |
||
283 | function teach_symbol_of_first_set() { |
||
284 | $go = false; |
||
285 | foreach ($this->rhs as $sym) { |
||
286 | if ($this->sym->first->union($sym->first)) $go = true; |
||
287 | if (!$sym->lambda) break; |
||
288 | } |
||
289 | return $go; |
||
290 | } |
||
291 | function lambda_from($dot) { |
||
292 | return $this->epsilon <= $dot; |
||
293 | } |
||
294 | function leftmost($follow) { |
||
295 | return new config($this, 0, $follow); |
||
296 | } |
||
297 | function dotted_text($dot) { |
||
298 | $out = $this->lhs_glyph().' :='; |
||
299 | $idx = -1; |
||
300 | foreach($this->rhs as $idx => $s) { |
||
301 | if ($idx == $dot) $out .= ' .'; |
||
302 | $out .= ' '.$s->name; |
||
303 | } |
||
304 | if ($dot > $idx) $out .= ' .'; |
||
305 | return $out; |
||
306 | } |
||
307 | } |
||
308 | class config { |
||
309 | function __construct($rule, $dot, $follow) { |
||
310 | $this->rule=$rule; |
||
311 | $this->dot = $dot; |
||
312 | $this->key = "$rule->id.$dot"; |
||
313 | $this->rightmost = count($rule->rhs) <= $dot; |
||
314 | $this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot]; |
||
315 | $this->_blink = array(); |
||
316 | $this->follow = new set($follow); |
||
317 | $this->_flink= array(); |
||
318 | bug_unless($this->rightmost or count($rule)); |
||
319 | } |
||
320 | function text() { |
||
321 | $out = $this->rule->dotted_text($this->dot); |
||
322 | $out .= ' [ '.implode(' ', $this->follow->all()).' ]'; |
||
323 | return $out; |
||
324 | } |
||
325 | function blink($config) { |
||
326 | $this->_blink[] = $config; |
||
327 | } |
||
328 | function next() { |
||
329 | bug_if($this->rightmost); |
||
330 | $c = new config($this->rule, $this->dot+1, array()); |
||
331 | # Anything in the follow set for this config will also be in the next. |
||
332 | # However, we link it backwards because we might wind up selecting a |
||
333 | # pre-existing state, and the housekeeping is easier in the first half |
||
334 | # of the program. We'll fix it before doing the propagation. |
||
335 | $c->blink($this); |
||
336 | return $c; |
||
337 | } |
||
338 | function copy_links_from($that) { |
||
339 | foreach($that->_blink as $c) $this->blink($c); |
||
340 | } |
||
341 | function lambda() { |
||
342 | return $this->rule->lambda_from($this->dot); |
||
343 | } |
||
344 | function simple_follow() { |
||
345 | return $this->rule->first[$this->dot+1]->all(); |
||
346 | } |
||
347 | function epsilon_follows() { |
||
348 | return $this->rule->lambda_from($this->dot+1); |
||
349 | } |
||
350 | function fixlinks() { |
||
351 | foreach ($this->_blink as $that) $that->_flink[] = $this; |
||
352 | $this->blink = array(); |
||
353 | } |
||
354 | function dump() { |
||
355 | echo " * "; |
||
356 | echo $this->key.' : '; |
||
357 | echo $this->rule->dotted_text($this->dot); |
||
358 | echo $this->follow->text(); |
||
359 | foreach ($this->_flink as $c) echo $c->key.' / '; |
||
360 | echo "\n"; |
||
361 | } |
||
362 | } |
||
363 | class lime { |
||
364 | var $parser_class = 'parser'; |
||
365 | function __construct() { |
||
366 | $this->p_next = 1; |
||
367 | $this->sym = array(); |
||
368 | $this->rule = array(); |
||
369 | $this->start_symbol_set = array(); |
||
370 | $this->state = array(); |
||
371 | $this->stop = $this->sym('#'); |
||
372 | #$err = $this->sym('error'); |
||
373 | $err->term = false; |
||
374 | $this->lang = new lime_language_php(); |
||
375 | } |
||
376 | function language() { return $this->lang; } |
||
377 | function build_parser() { |
||
378 | $this->add_start_rule(); |
||
379 | foreach ($this->rule as $r) $r->determine_precedence(); |
||
380 | $this->find_sym_lamdba(); |
||
381 | $this->find_sym_first(); |
||
382 | foreach ($this->rule as $rule) $rule->find_first(); |
||
383 | $initial = $this->find_states(); |
||
384 | $this->fixlinks(); |
||
385 | # $this->dump_configurations(); |
||
386 | $this->find_follow_sets(); |
||
387 | foreach($this->state as $s) $s->find_reductions($this); |
||
388 | $i = $this->resolve_conflicts(); |
||
389 | $a = $this->rule_table(); |
||
390 | $qi = $initial->id; |
||
391 | return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i')); |
||
392 | } |
||
393 | function rule_table() { |
||
394 | $s = array(); |
||
395 | foreach ($this->rule as $i => $r) { |
||
396 | $s[$i] = $r->table($this->lang); |
||
397 | } |
||
398 | return $s; |
||
399 | } |
||
400 | function add_rule($symbol, $rhs, $code) { |
||
401 | $this->add_raw_rule($symbol, $rhs, $code, count($rhs), true); |
||
402 | } |
||
403 | function trump_up_bogus_lhs($real) { |
||
404 | return "'$real'".count($this->rule); |
||
405 | } |
||
406 | function add_raw_rule($lhs, $rhs, $code, $look, $replace) { |
||
407 | $sym = $this->sym($lhs); |
||
408 | $sym->term=false; |
||
409 | if (empty($rhs)) $sym->lambda = true; |
||
410 | $rs = array(); |
||
411 | foreach ($rhs as $str) $rs[] = $this->sym($str); |
||
412 | $rid = count($this->rule); |
||
413 | $r = new rule($rid, $sym, $rs, $code, $look, $replace); |
||
414 | $this->rule[$rid] = $r; |
||
415 | $sym->rule[] = $r; |
||
416 | } |
||
417 | function sym($str) { |
||
418 | if (!isset($this->sym[$str])) $this->sym[$str] = new sym($str, count($this->sym)); |
||
419 | return $this->sym[$str]; |
||
420 | } |
||
421 | function summary() { |
||
422 | $out = ''; |
||
423 | foreach ($this->sym as $sym) if (!$sym->term) $out .= $sym->summary(); |
||
424 | return $out; |
||
425 | } |
||
426 | private function find_sym_lamdba() { |
||
427 | do { |
||
428 | $go = false; |
||
429 | foreach ($this->sym as $sym) if (!$sym->lambda) { |
||
430 | foreach ($sym->rule as $rule) if ($rule->lambda()) { |
||
431 | $go = true; |
||
432 | $sym->lambda = true; |
||
433 | } |
||
434 | } |
||
435 | } while ($go); |
||
436 | } |
||
437 | private function teach_terminals_first_set() { |
||
438 | foreach ($this->sym as $sym) if ($sym->term) $sym->first->add($sym->name); |
||
439 | } |
||
440 | private function find_sym_first() { |
||
441 | $this->teach_terminals_first_set(); |
||
442 | do { |
||
443 | $go = false; |
||
444 | foreach ($this->rule as $r) if ($r->teach_symbol_of_first_set()) $go = true; |
||
445 | } while ($go); |
||
446 | } |
||
447 | function add_start_rule() { |
||
448 | $rewrite = new lime_rewrite("'start'"); |
||
449 | $rhs = new lime_rhs(); |
||
450 | $rhs->add(new lime_glyph($this->deduce_start_symbol()->name, NULL)); |
||
451 | #$rhs->add(new lime_glyph($this->stop->name, NULL)); |
||
452 | $rewrite->add_rhs($rhs); |
||
453 | $rewrite->update($this); |
||
454 | } |
||
455 | private function deduce_start_symbol() { |
||
456 | $candidate = current($this->start_symbol_set); |
||
457 | # Did the person try to set a start symbol at all? |
||
458 | if (!$candidate) return $this->first_rule_lhs(); |
||
459 | # Do we actually have such a symbol on the left of a rule? |
||
460 | if ($candidate->terminal) return $this->first_rule_lhs(); |
||
461 | # Ok, it's a decent choice. We need to return the symbol entry. |
||
462 | return $this->sym($candidate); |
||
463 | } |
||
464 | private function first_rule_lhs() { |
||
465 | reset($this->rule); |
||
466 | $r = current($this->rule); |
||
467 | return $r->sym; |
||
468 | } |
||
469 | function find_states() { |
||
470 | /* |
||
471 | Build an initial state. This is a recursive process which digs out |
||
472 | the LR(0) state graph. |
||
473 | */ |
||
474 | $start_glyph = "'start'"; |
||
475 | $sym = $this->sym($start_glyph); |
||
476 | $basis = array(); |
||
477 | foreach($sym->rule as $rule) { |
||
478 | $c = $rule->leftmost(array('#')); |
||
479 | $basis[$c->key] = $c; |
||
480 | } |
||
481 | $initial = $this->get_state($basis); |
||
482 | $initial->add_accept($sym); |
||
483 | return $initial; |
||
484 | } |
||
485 | function get_state($basis) { |
||
486 | $key = array_keys($basis); |
||
487 | sort($key); |
||
488 | $key = implode(' ', $key); |
||
489 | if (isset($this->state[$key])) { |
||
490 | # Copy all the links around... |
||
491 | $state = $this->state[$key]; |
||
492 | foreach($basis as $config) $state->close[$config->key]->copy_links_from($config); |
||
493 | return $state; |
||
494 | } else { |
||
495 | $close = $this->state_closure($basis); |
||
496 | $this->state[$key] = $state = new state(count($this->state), $key, $close); |
||
497 | $this->build_shifts($state); |
||
498 | return $state; |
||
499 | } |
||
500 | } |
||
501 | private function state_closure($q) { |
||
502 | # $q is a list of config. |
||
503 | $close = array(); |
||
504 | while ($config = array_pop($q)) { |
||
505 | if (isset($close[$config->key])) { |
||
506 | $close[$config->key]->copy_links_from($config); |
||
507 | $close[$config->key]->follow->union($config->follow); |
||
508 | continue; |
||
509 | } |
||
510 | $close[$config->key] = $config; |
||
511 | |||
512 | $symbol_after_the_dot = $config->symbol_after_the_dot; |
||
513 | if (!$symbol_after_the_dot) continue; |
||
514 | |||
515 | if (! $symbol_after_the_dot->term) { |
||
516 | foreach ($symbol_after_the_dot->rule as $r) { |
||
517 | $station = $r->leftmost($config->simple_follow()); |
||
518 | if ($config->epsilon_follows()) $station->blink($config); |
||
519 | $q[] = $station; |
||
520 | } |
||
521 | # The following turned out to be wrong. Don't do it. |
||
522 | #if ($symbol_after_the_dot->lambda) { |
||
523 | # $q[] = $config->next(); |
||
524 | #} |
||
525 | } |
||
526 | |||
527 | } |
||
528 | return $close; |
||
529 | } |
||
530 | function build_shifts($state) { |
||
531 | foreach ($state->segment_config() as $glyph => $segment) { |
||
532 | $basis = array(); |
||
533 | foreach ($segment as $preshift) { |
||
534 | $postshift = $preshift->next(); |
||
535 | $basis[$postshift->key] = $postshift; |
||
536 | } |
||
537 | $dest = $this->get_state($basis); |
||
538 | $state->add_shift($this->sym($glyph), $dest); |
||
539 | } |
||
540 | } |
||
541 | function fixlinks() { |
||
542 | foreach ($this->state as $s) foreach ($s->close as $c) $c->fixlinks(); |
||
543 | } |
||
544 | function find_follow_sets() { |
||
545 | $q = array(); |
||
546 | foreach ($this->state as $s) foreach ($s->close as $c) $q[] = $c; |
||
547 | while ($q) { |
||
548 | $c = array_shift($q); |
||
549 | foreach ($c->_flink as $d) { |
||
550 | if ($d->follow->union($c->follow)) $q[] = $d; |
||
551 | } |
||
552 | } |
||
553 | } |
||
554 | private function set_assoc($ss, $l, $r) { |
||
555 | $p = ($this->p_next++)*2; |
||
556 | foreach ($ss as $glyph) { |
||
557 | $s = $this->sym($glyph); |
||
558 | $s->left_prec = $p+$l; |
||
559 | $s->right_prec = $p+$r; |
||
560 | } |
||
561 | } |
||
562 | function left_assoc($ss) { $this->set_assoc($ss, 1, 0); } |
||
563 | function right_assoc($ss) { $this->set_assoc($ss, 0, 1); } |
||
564 | function non_assoc($ss) { $this->set_assoc($ss, 0, 0); } |
||
565 | private function resolve_conflicts() { |
||
566 | # For each state, try to find one and only one |
||
567 | # thing to do for any given lookahead. |
||
568 | $i = array(); |
||
569 | foreach ($this->state as $s) $i[$s->id] = $s->resolve_conflicts(); |
||
570 | return $i; |
||
571 | } |
||
572 | function dump_configurations() { |
||
573 | foreach ($this->state as $q) $q->dump(); |
||
574 | } |
||
575 | function dump_first_sets() { |
||
576 | foreach ($this->sym as $s) { |
||
577 | echo " * "; |
||
578 | echo $s->name.' : '; |
||
579 | echo $s->first->text(); |
||
580 | echo "\n"; |
||
581 | } |
||
582 | } |
||
583 | function add_rule_with_actions($lhs, $rhs) { |
||
584 | # First, make sure this thing is well-formed. |
||
585 | if(!is_object(end($rhs))) $rhs[] = new cf_action(''); |
||
586 | # Now, split it into chunks based on the actions. |
||
587 | $look = -1; |
||
588 | $subrule = array(); |
||
589 | $subsymbol = ''; |
||
590 | while (count($rhs)) { |
||
591 | $it = array_shift($rhs); |
||
592 | $look ++; |
||
593 | if (is_string($it)) { |
||
594 | $subrule[] = $it; |
||
595 | } else { |
||
596 | $code = $it->code; |
||
597 | # It's an action. |
||
598 | # Is it the last one? |
||
599 | if (count($rhs)) { |
||
600 | # no. |
||
601 | $subsymbol = $this->trump_up_bogus_lhs($lhs); |
||
602 | $this->add_raw_rule($subsymbol, $subrule, $code, $look, false); |
||
603 | $subrule = array($subsymbol); |
||
604 | } else { |
||
605 | # yes. |
||
606 | $this->add_raw_rule($lhs, $subrule, $code, $look, true); |
||
607 | } |
||
608 | } |
||
609 | } |
||
610 | } |
||
611 | function pragma($type, $args) { |
||
612 | switch ($type) { |
||
613 | case 'left': |
||
614 | $this->left_assoc($args); |
||
615 | break; |
||
616 | |||
617 | case 'right': |
||
618 | $this->right_assoc($args); |
||
619 | break; |
||
620 | |||
621 | case 'nonassoc': |
||
622 | $this->non_assoc($args); |
||
623 | break; |
||
624 | |||
625 | case 'start': |
||
626 | $this->start_symbol_set = $args; |
||
627 | break; |
||
628 | |||
629 | case 'class': |
||
630 | $this->parser_class = $args[0]; |
||
631 | break; |
||
632 | |||
633 | default: |
||
634 | emit(sprintf("Bad Parser Pragma: (%s)", $type)); |
||
635 | exit(1); |
||
636 | } |
||
637 | } |
||
638 | } |
||
639 | class lime_language {} |
||
640 | class lime_language_php extends lime_language { |
||
641 | private function result_code($expr) { return "\$result = $expr;\n"; } |
||
642 | function default_result() { return $this->result_code('reset($tokens)'); } |
||
643 | function result_pos($pos) { return $this->result_code(lime_token_reference($pos)); } |
||
644 | function bind($name, $pos) { return "\$$name =& \$tokens[$pos];\n"; } |
||
645 | function fixup($code) { |
||
646 | $code = preg_replace_callback('/\\$(\d+)/', 'lime_token_reference_callback', $code); |
||
647 | $code = preg_replace('/\\$\\$/', '$result', $code); |
||
648 | return $code; |
||
649 | } |
||
650 | function to_php($code) { |
||
651 | return $code; |
||
652 | } |
||
653 | function ptab_to_class($parser_class, $ptab) { |
||
654 | $code = "class $parser_class extends lime_parser {\n"; |
||
655 | $code .= 'var $qi = '.var_export($ptab['qi'], true).";\n"; |
||
656 | $code .= 'var $i = '.var_export($ptab['i'], true).";\n"; |
||
657 | |||
658 | |||
659 | $rc = array(); |
||
660 | $method = array(); |
||
661 | $rules = array(); |
||
662 | foreach($ptab['a'] as $k => $a) { |
||
663 | $symbol = preg_replace('/[^\w]/', '', $a['symbol']); |
||
664 | $rn = ++$rc[$symbol]; |
||
665 | $mn = "reduce_${k}_${symbol}_${rn}"; |
||
666 | $method[$k] = $mn; |
||
667 | $comment = "#\n# $a[text]\n#\n"; |
||
668 | $php = $this->to_php($a['code']); |
||
669 | $code .= "function $mn(".LIME_CALL_PROTOCOL.") {\n$comment$php\n}\n\n"; |
||
670 | |||
671 | |||
672 | unset($a['code']); |
||
673 | unset($a['text']); |
||
674 | $rules[$k] = $a; |
||
675 | } |
||
676 | |||
677 | $code .= 'var $method = '.var_export($method, true).";\n"; |
||
678 | $code .= 'var $a = '.var_export($rules, true).";\n"; |
||
679 | |||
680 | |||
681 | |||
682 | $code .= "}\n"; |
||
683 | #echo $code; |
||
684 | return $code; |
||
685 | } |
||
686 | } |
||
687 | class lime_rhs { |
||
688 | function __construct() { |
||
689 | /** |
||
690 | Construct and add glyphs and actions in whatever order. |
||
691 | Then, add this to a lime_rewrite. |
||
692 | |||
693 | Don't call install_rule. |
||
694 | The rewrite will do that for you when you "update" with it. |
||
695 | */ |
||
696 | $this->rhs = array(); |
||
697 | } |
||
698 | function add($slot) { |
||
699 | bug_unless($slot instanceof lime_slot); |
||
700 | $this->rhs[] = $slot; |
||
701 | } |
||
702 | function install_rule(lime $lime, $lhs) { |
||
703 | # This is the part that has to break the rule into subrules if necessary. |
||
704 | $rhs = $this->rhs; |
||
705 | # First, make sure this thing is well-formed. |
||
706 | if (!(end($rhs) instanceof lime_action)) $rhs[] = new lime_action('', NULL); |
||
707 | # Now, split it into chunks based on the actions. |
||
708 | |||
709 | $lang = $lime->language(); |
||
710 | $result_code = $lang->default_result(); |
||
711 | $look = -1; |
||
712 | $subrule = array(); |
||
713 | $subsymbol = ''; |
||
714 | $preamble = ''; |
||
715 | while (count($rhs)) { |
||
716 | $it = array_shift($rhs); |
||
717 | $look ++; |
||
718 | if ($it instanceof lime_glyph) { |
||
719 | $subrule[] = $it->data; |
||
720 | } elseif ($it instanceof lime_action) { |
||
721 | $code = $it->data; |
||
722 | # It's an action. |
||
723 | # Is it the last one? |
||
724 | if (count($rhs)) { |
||
725 | # no. |
||
726 | $subsymbol = $lime->trump_up_bogus_lhs($lhs); |
||
727 | $action = $lang->default_result().$preamble.$code; |
||
728 | $lime->add_raw_rule($subsymbol, $subrule, $action, $look, false); |
||
729 | $subrule = array($subsymbol); |
||
730 | } else { |
||
731 | # yes. |
||
732 | $action = $result_code.$preamble.$code; |
||
733 | $lime->add_raw_rule($lhs, $subrule, $action, $look, true); |
||
734 | } |
||
735 | } else { |
||
736 | impossible(); |
||
737 | } |
||
738 | if ($it->name == '$') $result_code = $lang->result_pos($look); |
||
739 | elseif ($it->name) $preamble .= $lang->bind($it->name, $look); |
||
740 | } |
||
741 | } |
||
742 | } |
||
743 | class lime_rewrite { |
||
744 | function __construct($glyph) { |
||
745 | /** |
||
746 | Construct one of these with the name of the lhs. |
||
747 | Add some rhs-es to it. |
||
748 | Finally, "update" the lime you're building. |
||
749 | */ |
||
750 | $this->glyph = $glyph; |
||
751 | $this->rhs = array(); |
||
752 | } |
||
753 | function add_rhs($rhs) { |
||
754 | bug_unless($rhs instanceof lime_rhs); |
||
755 | $this->rhs[] = $rhs; |
||
756 | } |
||
757 | function update(lime $lime) { |
||
758 | foreach ($this->rhs as $rhs) { |
||
759 | $rhs->install_rule($lime, $this->glyph); |
||
760 | |||
761 | } |
||
762 | } |
||
763 | } |
||
764 | class lime_slot { |
||
765 | /** |
||
766 | This keeps track of one position in an rhs. |
||
767 | We specialize to handle actions and glyphs. |
||
768 | If there is a name for the slot, we store it here. |
||
769 | Later on, this structure will be consulted in the formation of |
||
770 | actual production rules. |
||
771 | */ |
||
772 | function __construct($data, $name) { |
||
773 | $this->data = $data; |
||
774 | $this->name = $name; |
||
775 | } |
||
776 | function preamble($pos) { |
||
777 | if (strlen($this->name) > 0) { |
||
778 | return "\$$this->name =& \$tokens[$pos];\n"; |
||
779 | } |
||
780 | } |
||
781 | } |
||
782 | class lime_glyph extends lime_slot {} |
||
783 | class lime_action extends lime_slot {} |
||
784 | function lime_bootstrap() { |
||
785 | |||
786 | /* |
||
787 | |||
788 | This function isn't too terribly interesting to the casual observer. |
||
789 | You're probably better off looking at parse_lime_grammar() instead. |
||
790 | |||
791 | Ok, if you insist, I'll explain. |
||
792 | |||
793 | The input to Lime is a CFG parser definition. That definition is |
||
794 | written in some language. (The Lime language, to be exact.) |
||
795 | Anyway, I have to parse the Lime language and compile it into a |
||
796 | very complex data structure from which a parser is eventually |
||
797 | built. What better way than to use Lime itself to parse its own |
||
798 | language? Well, it's almost that simple, but not quite. |
||
799 | |||
800 | The Lime language is fairly potent, but a restricted subset of |
||
801 | its features was used to write a metagrammar. Then, I hand-translated |
||
802 | that metagrammar into another form which is easy to snarf up. |
||
803 | In the process of reading that simplified form, this function |
||
804 | builds the same sort of data structure that later gets turned into |
||
805 | a parser. The last step is to run the parser generation algorithm, |
||
806 | eval() the resulting PHP code, and voila! With no hard work, I can |
||
807 | suddenly read and comprehend the full range of the Lime language |
||
808 | without ever having written an algorithm to do so. It feels like magic. |
||
809 | |||
810 | */ |
||
811 | |||
812 | $bootstrap = LIME_DIR."/lime.bootstrap"; |
||
813 | $lime = new lime(); |
||
814 | $lime->parser_class = 'lime_metaparser'; |
||
815 | $rhs = array(); |
||
816 | bug_unless(is_readable($bootstrap)); |
||
817 | foreach(file($bootstrap) as $l) { |
||
818 | $a = explode(":", $l, 2); |
||
819 | if (count($a) == 2) { |
||
820 | list($pattern, $code) = $a; |
||
821 | $sl = new lime_rhs(); |
||
822 | $pattern = trim($pattern); |
||
823 | if (strlen($pattern)>0) { |
||
824 | foreach (explode(' ', $pattern) as $glyph) $sl->add(new lime_glyph($glyph, NULL)); |
||
825 | } |
||
826 | $sl->add(new lime_action($code, NULL)); |
||
827 | $rhs[] = $sl; |
||
828 | } else { |
||
829 | $m = preg_match('/^to (\w+)$/', $l, $r); |
||
830 | if ($m == 0) continue; |
||
831 | $g = $r[1]; |
||
832 | $rw = new lime_rewrite($g); |
||
833 | foreach($rhs as $b) $rw->add_rhs($b); |
||
834 | $rw->update($lime); |
||
835 | $rhs = array(); |
||
836 | } |
||
837 | } |
||
838 | $parser_code = $lime->build_parser(); |
||
839 | eval($parser_code); |
||
840 | } |
||
841 | |||
842 | class voodoo_scanner extends flex_scanner { |
||
843 | /* |
||
844 | |||
845 | The voodoo is in the way I do lexical processing on grammar definition |
||
846 | files. They contain embedded bits of PHP, and it's important to keep |
||
847 | track of things like strings, comments, and matched braces. It seemed |
||
848 | like an ideal problem to solve with GNU flex, so I wrote a little |
||
849 | scanner in flex and C to dig out the tokens for me. Of course, I need |
||
850 | the tokens in PHP, so I designed a simple binary wrapper for them which |
||
851 | also contains line-number information, guaranteed to help out if you |
||
852 | write a grammar which surprises the parser in any manner. |
||
853 | |||
854 | */ |
||
855 | function executable() { return LIME_DIR.'/lime_scan_tokens'; } |
||
856 | } |
||
857 | |||
858 | function parse_lime_grammar($path) { |
||
859 | /* |
||
860 | |||
861 | This is a good function to read because it teaches you how to interface |
||
862 | with a Lime parser. I've tried to isolate out the bits that aren't |
||
863 | instructive in that regard. |
||
864 | |||
865 | */ |
||
866 | if (!class_exists('lime_metaparser')) lime_bootstrap(); |
||
867 | |||
868 | $parse_engine = new parse_engine(new lime_metaparser()); |
||
869 | $scanner = new voodoo_scanner($path); |
||
870 | try { |
||
871 | # The result of parsing a Lime grammar is a Lime object. |
||
872 | $lime = $scanner->feed($parse_engine); |
||
873 | # Calling its build_parser() method gets the output PHP code. |
||
874 | return $lime->build_parser(); |
||
875 | } catch (parse_error $e) { |
||
876 | die ($e->getMessage()." in $path line $scanner->lineno.\n"); |
||
877 | } |
||
878 | } |
||
879 | |||
880 | |||
881 | if ($_SERVER['argv']) { |
||
882 | $code = ''; |
||
883 | array_shift($_SERVER['argv']); # Strip out the program name. |
||
884 | foreach ($_SERVER['argv'] as $path) { |
||
885 | $code .= parse_lime_grammar($path); |
||
886 | } |
||
887 | |||
888 | echo "<?php\n\n"; |
||
889 | ?> |
||
890 | |||
891 | /* |
||
892 | |||
893 | DON'T EDIT THIS FILE! |
||
894 | |||
895 | This file was automatically generated by the Lime parser generator. |
||
896 | The real source code you should be looking at is in one or more |
||
897 | grammar files in the Lime format. |
||
898 | |||
899 | THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar |
||
900 | file that your error happened, because there are enough comments to |
||
901 | help you debug your grammar. |
||
902 | |||
903 | If you ignore this warning, you're shooting yourself in the brain, |
||
904 | not the foot. |
||
905 | |||
906 | */ |
||
907 | |||
908 | <?php |
||
909 | echo $code; |
||
910 | } |