BadVPN – Blame information for rev 1

Subversion Repositories:
Rev:
Rev Author Line No. Line
1 office 1 <?php
2 /*
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16 */
17  
18 define('LIME_DIR', dirname(__FILE__));
19  
20 function emit($str) { fputs(STDERR, $str."\n"); }
21  
22 class Bug extends Exception {}
23 function bug($gripe='Bug found.') { throw new Bug($gripe); }
24 function bug_if($falacy, $gripe='Bug found.') { if ($falacy) throw new Bug($gripe); }
25 function bug_unless($assertion, $gripe='Bug found.') { if (!$assertion) throw new Bug($gripe); }
26  
27 include_once(LIME_DIR.'/parse_engine.php');
28 include_once(LIME_DIR.'/set.so.php');
29 include_once(LIME_DIR.'/flex_token_stream.php');
30  
31 function lime_token_reference($pos) { return '$tokens['.$pos.']'; }
32 function lime_token_reference_callback($foo) { return lime_token_reference($foo[1]-1); }
33  
34 class cf_action {
35 function __construct($code) { $this->code=$code; }
36 }
37 class step {
38 /*
39 Base class for parse table instructions. The main idea is to make the
40 subclasses responsible for conflict resolution among themselves. It also
41 forms a sort of interface to the parse table.
42 */
43 function __construct($sym) {
44 bug_unless($sym instanceof sym);
45 $this->sym = $sym;
46 }
47 function glyph() { return $this->sym->name; }
48 }
49 class error extends step {
50 function sane() { return false; }
51 function instruction() { bug("This should not happen."); }
52 function decide($that) { return $this; /* An error shall remain one. */ }
53 }
54 class shift extends step {
55 function __construct($sym, $q) {
56 parent::__construct($sym);
57 $this->q = $q;
58 }
59 function sane() { return true; }
60 function instruction() { return "s $this->q"; }
61 function decide($that) {
62 # shift-shift conflicts are impossible.
63 # shift-accept conflicts are a bug.
64 # so we can infer:
65 bug_unless($that instanceof reduce);
66  
67 # That being said, the resolution is a matter of precedence.
68 $shift_prec = $this->sym->right_prec;
69 $reduce_prec = $that->rule->prec;
70  
71 # If we don't have defined precedence levels for both options,
72 # then we default to shifting:
73 if (!($shift_prec and $reduce_prec)) return $this;
74  
75 # Otherwise, use the step with higher precedence.
76 if ($shift_prec > $reduce_prec) return $this;
77 if ($reduce_prec > $shift_prec) return $that;
78  
79 # The "nonassoc" works by giving equal precedence to both options,
80 # which means to put an error instruction in the parse table.
81 return new error($this->sym);
82 }
83 }
84 class reduce extends step {
85 function __construct($sym, $rule) {
86 bug_unless($rule instanceof rule);
87 parent::__construct($sym);
88 $this->rule = $rule;
89 }
90 function sane() { return true; }
91 function instruction() { return 'r '.$this->rule->id; }
92 function decide($that) {
93 # This means that the input grammar has a reduce-reduce conflict.
94 # Such things are considered an error in the input.
95 throw new RRC($this, $that);
96 #exit(1);
97 # BISON would go with the first encountered reduce thus:
98 # return $this;
99 }
100 }
101 class accept extends step {
102 function __construct($sym) { parent::__construct($sym); }
103 function sane() { return true; }
104 function instruction() { return 'a '.$this->sym->name; }
105 }
106 class RRC extends Exception {
107 function __construct($a, $b) {
108 parent::__construct("Reduce-Reduce Conflict");
109 $this->a = $a;
110 $this->b = $b;
111 }
112 function make_noise() {
113 emit(sprintf(
114 "Reduce-Reduce Conflict:\n%s\n%s\nLookahead is (%s)",
115 $this->a->rule->text(),
116 $this->b->rule->text(),
117 $this->a->glyph()
118 ));
119 }
120 }
121 class state {
122 function __construct($id, $key, $close) {
123 $this->id = $id;
124 $this->key = $key;
125 $this->close = $close; # config key -> object
126 ksort($this->close);
127 $this->action = array();
128 }
129 function dump() {
130 echo " * ".$this->id.' / '.$this->key."\n";
131 foreach ($this->close as $config) $config->dump();
132 }
133 function add_shift($sym, $state) {
134 $this->add_instruction(new shift($sym, $state->id));
135 }
136 function add_reduce($sym, $rule) {
137 $this->add_instruction(new reduce($sym, $rule));
138 }
139 function add_accept($sym) {
140 $this->add_instruction(new accept($sym));
141 }
142 function add_instruction($step) {
143 bug_unless($step instanceof step);
144 $this->action[] = $step;
145 }
146 function find_reductions($lime) {
147 # rightmost configurations followset yields reduce.
148 foreach($this->close as $c) {
149 if ($c->rightmost) {
150 foreach ($c->follow->all() as $glyph) $this->add_reduce($lime->sym($glyph), $c->rule);
151 }
152 }
153 }
154 function resolve_conflicts() {
155 # For each possible lookahead, find one (and only one) step to take.
156 $table = array();
157 foreach ($this->action as $step) {
158 $glyph = $step->glyph();
159 if (isset($table[$glyph])) {
160 # There's a conflict. The shifts all came first, which
161 # simplifies the coding for the step->decide() methods.
162 try {
163 $table[$glyph] = $table[$glyph]->decide($step);
164 } catch (RRC $e) {
165 emit("State $this->id:");
166 $e->make_noise();
167 }
168 } else {
169 # This glyph is yet unprocessed, so the step at hand is
170 # our best current guess at what the grammar indicates.
171 $table[$glyph] = $step;
172 }
173 }
174  
175 # Now that we have the correct steps chosen, this routine is oddly
176 # also responsible for turning that table into the form that will
177 # eventually be passed to the parse engine. (So FIXME?)
178 $out = array();
179 foreach ($table as $glyph => $step) {
180 if ($step->sane()) $out[$glyph] = $step->instruction();
181 }
182 return $out;
183 }
184 function segment_config() {
185 # Filter $this->close into categories based on the symbol_after_the_dot.
186 $f = array();
187 foreach ($this->close as $c) {
188 $p = $c->symbol_after_the_dot;
189 if (!$p) continue;
190 $f[$p->name][] = $c;
191 }
192 return $f;
193 }
194 }
195 class sym {
196 function __construct($name, $id) {
197 $this->name=$name;
198 $this->id=$id;
199 $this->term = true; # Until proven otherwise.
200 $this->rule = array();
201 $this->config = array();
202 $this->lambda = false;
203 $this->first = new set();
204 $this->left_prec = $this->right_prec = 0;
205 }
206 function summary() {
207 $out = '';
208 foreach ($this->rule as $rule) $out .= $rule->text()."\n";
209 return $out;
210 }
211 }
212 class rule {
213 function __construct($id, $sym, $rhs, $code, $look, $replace) {
214 $this->id = $id;
215 $this->sym = $sym;
216 $this->rhs = $rhs;
217 $this->code = $code;
218 $this->look = $look;
219 bug_unless(is_int($look));
220 $this->replace = $replace;
221 #$this->prec_sym = $prec_sym;
222 $this->prec = 0;
223 $this->first = array();
224 $this->epsilon = count($rhs);
225 }
226 function lhs_glyph() { return $this->sym->name; }
227 function determine_precedence() {
228 # We may eventually expand to allow explicit prec_symbol declarations.
229 # Until then, we'll go with the rightmost terminal, which is what
230 # BISON does. People probably expect that. The leftmost terminal
231 # is a reasonable alternative behaviour, but I don't see the big
232 # deal just now.
233  
234 #$prec_sym = $this->prec_sym;
235 #if (!$prec_sym)
236 $prec_sym = $this->rightmost_terminal();
237 if (!$prec_sym) return;
238 $this->prec = $prec_sym->left_prec;
239 }
240 private function rightmost_terminal() {
241 $symbol = NULL;
242 $rhs = $this->rhs;
243 while ($rhs) {
244 $symbol = array_pop($rhs);
245 if ($symbol->term) break;
246 }
247 return $symbol;
248 }
249 function text() {
250 $t = "($this->id) ".$this->lhs_glyph().' :=';
251 foreach($this->rhs as $s) $t .= ' '.$s->name;
252 return $t;
253 }
254 function table(lime_language $lang) {
255 return array(
256 'symbol' => $this->lhs_glyph(),
257 'len' => $this->look,
258 'replace' => $this->replace,
259 'code' => $lang->fixup($this->code),
260 'text' => $this->text(),
261 );
262 }
263 function lambda() {
264 foreach ($this->rhs as $sym) if (!$sym->lambda) return false;
265 return true;
266 }
267 function find_first() {
268 $dot = count($this->rhs);
269 $last = $this->first[$dot] = new set();
270 while ($dot) {
271 $dot--;
272 $symbol_after_the_dot = $this->rhs[$dot];
273 $first = $symbol_after_the_dot->first->all();
274 bug_if(empty($first) and !$symbol_after_the_dot->lambda);
275 $set = new set($first);
276 if ($symbol_after_the_dot->lambda) {
277 $set->union($last);
278 if ($this->epsilon == $dot+1) $this->epsilon = $dot;
279 }
280 $last = $this->first[$dot] = $set;
281 }
282 }
283 function teach_symbol_of_first_set() {
284 $go = false;
285 foreach ($this->rhs as $sym) {
286 if ($this->sym->first->union($sym->first)) $go = true;
287 if (!$sym->lambda) break;
288 }
289 return $go;
290 }
291 function lambda_from($dot) {
292 return $this->epsilon <= $dot;
293 }
294 function leftmost($follow) {
295 return new config($this, 0, $follow);
296 }
297 function dotted_text($dot) {
298 $out = $this->lhs_glyph().' :=';
299 $idx = -1;
300 foreach($this->rhs as $idx => $s) {
301 if ($idx == $dot) $out .= ' .';
302 $out .= ' '.$s->name;
303 }
304 if ($dot > $idx) $out .= ' .';
305 return $out;
306 }
307 }
308 class config {
309 function __construct($rule, $dot, $follow) {
310 $this->rule=$rule;
311 $this->dot = $dot;
312 $this->key = "$rule->id.$dot";
313 $this->rightmost = count($rule->rhs) <= $dot;
314 $this->symbol_after_the_dot = $this->rightmost ? null : $rule->rhs[$dot];
315 $this->_blink = array();
316 $this->follow = new set($follow);
317 $this->_flink= array();
318 bug_unless($this->rightmost or count($rule));
319 }
320 function text() {
321 $out = $this->rule->dotted_text($this->dot);
322 $out .= ' [ '.implode(' ', $this->follow->all()).' ]';
323 return $out;
324 }
325 function blink($config) {
326 $this->_blink[] = $config;
327 }
328 function next() {
329 bug_if($this->rightmost);
330 $c = new config($this->rule, $this->dot+1, array());
331 # Anything in the follow set for this config will also be in the next.
332 # However, we link it backwards because we might wind up selecting a
333 # pre-existing state, and the housekeeping is easier in the first half
334 # of the program. We'll fix it before doing the propagation.
335 $c->blink($this);
336 return $c;
337 }
338 function copy_links_from($that) {
339 foreach($that->_blink as $c) $this->blink($c);
340 }
341 function lambda() {
342 return $this->rule->lambda_from($this->dot);
343 }
344 function simple_follow() {
345 return $this->rule->first[$this->dot+1]->all();
346 }
347 function epsilon_follows() {
348 return $this->rule->lambda_from($this->dot+1);
349 }
350 function fixlinks() {
351 foreach ($this->_blink as $that) $that->_flink[] = $this;
352 $this->blink = array();
353 }
354 function dump() {
355 echo " * ";
356 echo $this->key.' : ';
357 echo $this->rule->dotted_text($this->dot);
358 echo $this->follow->text();
359 foreach ($this->_flink as $c) echo $c->key.' / ';
360 echo "\n";
361 }
362 }
363 class lime {
364 var $parser_class = 'parser';
365 function __construct() {
366 $this->p_next = 1;
367 $this->sym = array();
368 $this->rule = array();
369 $this->start_symbol_set = array();
370 $this->state = array();
371 $this->stop = $this->sym('#');
372 #$err = $this->sym('error');
373 $err->term = false;
374 $this->lang = new lime_language_php();
375 }
376 function language() { return $this->lang; }
377 function build_parser() {
378 $this->add_start_rule();
379 foreach ($this->rule as $r) $r->determine_precedence();
380 $this->find_sym_lamdba();
381 $this->find_sym_first();
382 foreach ($this->rule as $rule) $rule->find_first();
383 $initial = $this->find_states();
384 $this->fixlinks();
385 # $this->dump_configurations();
386 $this->find_follow_sets();
387 foreach($this->state as $s) $s->find_reductions($this);
388 $i = $this->resolve_conflicts();
389 $a = $this->rule_table();
390 $qi = $initial->id;
391 return $this->lang->ptab_to_class($this->parser_class, compact('a', 'qi', 'i'));
392 }
393 function rule_table() {
394 $s = array();
395 foreach ($this->rule as $i => $r) {
396 $s[$i] = $r->table($this->lang);
397 }
398 return $s;
399 }
400 function add_rule($symbol, $rhs, $code) {
401 $this->add_raw_rule($symbol, $rhs, $code, count($rhs), true);
402 }
403 function trump_up_bogus_lhs($real) {
404 return "'$real'".count($this->rule);
405 }
406 function add_raw_rule($lhs, $rhs, $code, $look, $replace) {
407 $sym = $this->sym($lhs);
408 $sym->term=false;
409 if (empty($rhs)) $sym->lambda = true;
410 $rs = array();
411 foreach ($rhs as $str) $rs[] = $this->sym($str);
412 $rid = count($this->rule);
413 $r = new rule($rid, $sym, $rs, $code, $look, $replace);
414 $this->rule[$rid] = $r;
415 $sym->rule[] = $r;
416 }
417 function sym($str) {
418 if (!isset($this->sym[$str])) $this->sym[$str] = new sym($str, count($this->sym));
419 return $this->sym[$str];
420 }
421 function summary() {
422 $out = '';
423 foreach ($this->sym as $sym) if (!$sym->term) $out .= $sym->summary();
424 return $out;
425 }
426 private function find_sym_lamdba() {
427 do {
428 $go = false;
429 foreach ($this->sym as $sym) if (!$sym->lambda) {
430 foreach ($sym->rule as $rule) if ($rule->lambda()) {
431 $go = true;
432 $sym->lambda = true;
433 }
434 }
435 } while ($go);
436 }
437 private function teach_terminals_first_set() {
438 foreach ($this->sym as $sym) if ($sym->term) $sym->first->add($sym->name);
439 }
440 private function find_sym_first() {
441 $this->teach_terminals_first_set();
442 do {
443 $go = false;
444 foreach ($this->rule as $r) if ($r->teach_symbol_of_first_set()) $go = true;
445 } while ($go);
446 }
447 function add_start_rule() {
448 $rewrite = new lime_rewrite("'start'");
449 $rhs = new lime_rhs();
450 $rhs->add(new lime_glyph($this->deduce_start_symbol()->name, NULL));
451 #$rhs->add(new lime_glyph($this->stop->name, NULL));
452 $rewrite->add_rhs($rhs);
453 $rewrite->update($this);
454 }
455 private function deduce_start_symbol() {
456 $candidate = current($this->start_symbol_set);
457 # Did the person try to set a start symbol at all?
458 if (!$candidate) return $this->first_rule_lhs();
459 # Do we actually have such a symbol on the left of a rule?
460 if ($candidate->terminal) return $this->first_rule_lhs();
461 # Ok, it's a decent choice. We need to return the symbol entry.
462 return $this->sym($candidate);
463 }
464 private function first_rule_lhs() {
465 reset($this->rule);
466 $r = current($this->rule);
467 return $r->sym;
468 }
469 function find_states() {
470 /*
471 Build an initial state. This is a recursive process which digs out
472 the LR(0) state graph.
473 */
474 $start_glyph = "'start'";
475 $sym = $this->sym($start_glyph);
476 $basis = array();
477 foreach($sym->rule as $rule) {
478 $c = $rule->leftmost(array('#'));
479 $basis[$c->key] = $c;
480 }
481 $initial = $this->get_state($basis);
482 $initial->add_accept($sym);
483 return $initial;
484 }
485 function get_state($basis) {
486 $key = array_keys($basis);
487 sort($key);
488 $key = implode(' ', $key);
489 if (isset($this->state[$key])) {
490 # Copy all the links around...
491 $state = $this->state[$key];
492 foreach($basis as $config) $state->close[$config->key]->copy_links_from($config);
493 return $state;
494 } else {
495 $close = $this->state_closure($basis);
496 $this->state[$key] = $state = new state(count($this->state), $key, $close);
497 $this->build_shifts($state);
498 return $state;
499 }
500 }
501 private function state_closure($q) {
502 # $q is a list of config.
503 $close = array();
504 while ($config = array_pop($q)) {
505 if (isset($close[$config->key])) {
506 $close[$config->key]->copy_links_from($config);
507 $close[$config->key]->follow->union($config->follow);
508 continue;
509 }
510 $close[$config->key] = $config;
511  
512 $symbol_after_the_dot = $config->symbol_after_the_dot;
513 if (!$symbol_after_the_dot) continue;
514  
515 if (! $symbol_after_the_dot->term) {
516 foreach ($symbol_after_the_dot->rule as $r) {
517 $station = $r->leftmost($config->simple_follow());
518 if ($config->epsilon_follows()) $station->blink($config);
519 $q[] = $station;
520 }
521 # The following turned out to be wrong. Don't do it.
522 #if ($symbol_after_the_dot->lambda) {
523 # $q[] = $config->next();
524 #}
525 }
526  
527 }
528 return $close;
529 }
530 function build_shifts($state) {
531 foreach ($state->segment_config() as $glyph => $segment) {
532 $basis = array();
533 foreach ($segment as $preshift) {
534 $postshift = $preshift->next();
535 $basis[$postshift->key] = $postshift;
536 }
537 $dest = $this->get_state($basis);
538 $state->add_shift($this->sym($glyph), $dest);
539 }
540 }
541 function fixlinks() {
542 foreach ($this->state as $s) foreach ($s->close as $c) $c->fixlinks();
543 }
544 function find_follow_sets() {
545 $q = array();
546 foreach ($this->state as $s) foreach ($s->close as $c) $q[] = $c;
547 while ($q) {
548 $c = array_shift($q);
549 foreach ($c->_flink as $d) {
550 if ($d->follow->union($c->follow)) $q[] = $d;
551 }
552 }
553 }
554 private function set_assoc($ss, $l, $r) {
555 $p = ($this->p_next++)*2;
556 foreach ($ss as $glyph) {
557 $s = $this->sym($glyph);
558 $s->left_prec = $p+$l;
559 $s->right_prec = $p+$r;
560 }
561 }
562 function left_assoc($ss) { $this->set_assoc($ss, 1, 0); }
563 function right_assoc($ss) { $this->set_assoc($ss, 0, 1); }
564 function non_assoc($ss) { $this->set_assoc($ss, 0, 0); }
565 private function resolve_conflicts() {
566 # For each state, try to find one and only one
567 # thing to do for any given lookahead.
568 $i = array();
569 foreach ($this->state as $s) $i[$s->id] = $s->resolve_conflicts();
570 return $i;
571 }
572 function dump_configurations() {
573 foreach ($this->state as $q) $q->dump();
574 }
575 function dump_first_sets() {
576 foreach ($this->sym as $s) {
577 echo " * ";
578 echo $s->name.' : ';
579 echo $s->first->text();
580 echo "\n";
581 }
582 }
583 function add_rule_with_actions($lhs, $rhs) {
584 # First, make sure this thing is well-formed.
585 if(!is_object(end($rhs))) $rhs[] = new cf_action('');
586 # Now, split it into chunks based on the actions.
587 $look = -1;
588 $subrule = array();
589 $subsymbol = '';
590 while (count($rhs)) {
591 $it = array_shift($rhs);
592 $look ++;
593 if (is_string($it)) {
594 $subrule[] = $it;
595 } else {
596 $code = $it->code;
597 # It's an action.
598 # Is it the last one?
599 if (count($rhs)) {
600 # no.
601 $subsymbol = $this->trump_up_bogus_lhs($lhs);
602 $this->add_raw_rule($subsymbol, $subrule, $code, $look, false);
603 $subrule = array($subsymbol);
604 } else {
605 # yes.
606 $this->add_raw_rule($lhs, $subrule, $code, $look, true);
607 }
608 }
609 }
610 }
611 function pragma($type, $args) {
612 switch ($type) {
613 case 'left':
614 $this->left_assoc($args);
615 break;
616  
617 case 'right':
618 $this->right_assoc($args);
619 break;
620  
621 case 'nonassoc':
622 $this->non_assoc($args);
623 break;
624  
625 case 'start':
626 $this->start_symbol_set = $args;
627 break;
628  
629 case 'class':
630 $this->parser_class = $args[0];
631 break;
632  
633 default:
634 emit(sprintf("Bad Parser Pragma: (%s)", $type));
635 exit(1);
636 }
637 }
638 }
639 class lime_language {}
640 class lime_language_php extends lime_language {
641 private function result_code($expr) { return "\$result = $expr;\n"; }
642 function default_result() { return $this->result_code('reset($tokens)'); }
643 function result_pos($pos) { return $this->result_code(lime_token_reference($pos)); }
644 function bind($name, $pos) { return "\$$name =& \$tokens[$pos];\n"; }
645 function fixup($code) {
646 $code = preg_replace_callback('/\\$(\d+)/', 'lime_token_reference_callback', $code);
647 $code = preg_replace('/\\$\\$/', '$result', $code);
648 return $code;
649 }
650 function to_php($code) {
651 return $code;
652 }
653 function ptab_to_class($parser_class, $ptab) {
654 $code = "class $parser_class extends lime_parser {\n";
655 $code .= 'var $qi = '.var_export($ptab['qi'], true).";\n";
656 $code .= 'var $i = '.var_export($ptab['i'], true).";\n";
657  
658  
659 $rc = array();
660 $method = array();
661 $rules = array();
662 foreach($ptab['a'] as $k => $a) {
663 $symbol = preg_replace('/[^\w]/', '', $a['symbol']);
664 $rn = ++$rc[$symbol];
665 $mn = "reduce_${k}_${symbol}_${rn}";
666 $method[$k] = $mn;
667 $comment = "#\n# $a[text]\n#\n";
668 $php = $this->to_php($a['code']);
669 $code .= "function $mn(".LIME_CALL_PROTOCOL.") {\n$comment$php\n}\n\n";
670  
671  
672 unset($a['code']);
673 unset($a['text']);
674 $rules[$k] = $a;
675 }
676  
677 $code .= 'var $method = '.var_export($method, true).";\n";
678 $code .= 'var $a = '.var_export($rules, true).";\n";
679  
680  
681  
682 $code .= "}\n";
683 #echo $code;
684 return $code;
685 }
686 }
687 class lime_rhs {
688 function __construct() {
689 /**
690 Construct and add glyphs and actions in whatever order.
691 Then, add this to a lime_rewrite.
692  
693 Don't call install_rule.
694 The rewrite will do that for you when you "update" with it.
695 */
696 $this->rhs = array();
697 }
698 function add($slot) {
699 bug_unless($slot instanceof lime_slot);
700 $this->rhs[] = $slot;
701 }
702 function install_rule(lime $lime, $lhs) {
703 # This is the part that has to break the rule into subrules if necessary.
704 $rhs = $this->rhs;
705 # First, make sure this thing is well-formed.
706 if (!(end($rhs) instanceof lime_action)) $rhs[] = new lime_action('', NULL);
707 # Now, split it into chunks based on the actions.
708  
709 $lang = $lime->language();
710 $result_code = $lang->default_result();
711 $look = -1;
712 $subrule = array();
713 $subsymbol = '';
714 $preamble = '';
715 while (count($rhs)) {
716 $it = array_shift($rhs);
717 $look ++;
718 if ($it instanceof lime_glyph) {
719 $subrule[] = $it->data;
720 } elseif ($it instanceof lime_action) {
721 $code = $it->data;
722 # It's an action.
723 # Is it the last one?
724 if (count($rhs)) {
725 # no.
726 $subsymbol = $lime->trump_up_bogus_lhs($lhs);
727 $action = $lang->default_result().$preamble.$code;
728 $lime->add_raw_rule($subsymbol, $subrule, $action, $look, false);
729 $subrule = array($subsymbol);
730 } else {
731 # yes.
732 $action = $result_code.$preamble.$code;
733 $lime->add_raw_rule($lhs, $subrule, $action, $look, true);
734 }
735 } else {
736 impossible();
737 }
738 if ($it->name == '$') $result_code = $lang->result_pos($look);
739 elseif ($it->name) $preamble .= $lang->bind($it->name, $look);
740 }
741 }
742 }
743 class lime_rewrite {
744 function __construct($glyph) {
745 /**
746 Construct one of these with the name of the lhs.
747 Add some rhs-es to it.
748 Finally, "update" the lime you're building.
749 */
750 $this->glyph = $glyph;
751 $this->rhs = array();
752 }
753 function add_rhs($rhs) {
754 bug_unless($rhs instanceof lime_rhs);
755 $this->rhs[] = $rhs;
756 }
757 function update(lime $lime) {
758 foreach ($this->rhs as $rhs) {
759 $rhs->install_rule($lime, $this->glyph);
760  
761 }
762 }
763 }
764 class lime_slot {
765 /**
766 This keeps track of one position in an rhs.
767 We specialize to handle actions and glyphs.
768 If there is a name for the slot, we store it here.
769 Later on, this structure will be consulted in the formation of
770 actual production rules.
771 */
772 function __construct($data, $name) {
773 $this->data = $data;
774 $this->name = $name;
775 }
776 function preamble($pos) {
777 if (strlen($this->name) > 0) {
778 return "\$$this->name =& \$tokens[$pos];\n";
779 }
780 }
781 }
782 class lime_glyph extends lime_slot {}
783 class lime_action extends lime_slot {}
784 function lime_bootstrap() {
785  
786 /*
787  
788 This function isn't too terribly interesting to the casual observer.
789 You're probably better off looking at parse_lime_grammar() instead.
790  
791 Ok, if you insist, I'll explain.
792  
793 The input to Lime is a CFG parser definition. That definition is
794 written in some language. (The Lime language, to be exact.)
795 Anyway, I have to parse the Lime language and compile it into a
796 very complex data structure from which a parser is eventually
797 built. What better way than to use Lime itself to parse its own
798 language? Well, it's almost that simple, but not quite.
799  
800 The Lime language is fairly potent, but a restricted subset of
801 its features was used to write a metagrammar. Then, I hand-translated
802 that metagrammar into another form which is easy to snarf up.
803 In the process of reading that simplified form, this function
804 builds the same sort of data structure that later gets turned into
805 a parser. The last step is to run the parser generation algorithm,
806 eval() the resulting PHP code, and voila! With no hard work, I can
807 suddenly read and comprehend the full range of the Lime language
808 without ever having written an algorithm to do so. It feels like magic.
809  
810 */
811  
812 $bootstrap = LIME_DIR."/lime.bootstrap";
813 $lime = new lime();
814 $lime->parser_class = 'lime_metaparser';
815 $rhs = array();
816 bug_unless(is_readable($bootstrap));
817 foreach(file($bootstrap) as $l) {
818 $a = explode(":", $l, 2);
819 if (count($a) == 2) {
820 list($pattern, $code) = $a;
821 $sl = new lime_rhs();
822 $pattern = trim($pattern);
823 if (strlen($pattern)>0) {
824 foreach (explode(' ', $pattern) as $glyph) $sl->add(new lime_glyph($glyph, NULL));
825 }
826 $sl->add(new lime_action($code, NULL));
827 $rhs[] = $sl;
828 } else {
829 $m = preg_match('/^to (\w+)$/', $l, $r);
830 if ($m == 0) continue;
831 $g = $r[1];
832 $rw = new lime_rewrite($g);
833 foreach($rhs as $b) $rw->add_rhs($b);
834 $rw->update($lime);
835 $rhs = array();
836 }
837 }
838 $parser_code = $lime->build_parser();
839 eval($parser_code);
840 }
841  
842 class voodoo_scanner extends flex_scanner {
843 /*
844  
845 The voodoo is in the way I do lexical processing on grammar definition
846 files. They contain embedded bits of PHP, and it's important to keep
847 track of things like strings, comments, and matched braces. It seemed
848 like an ideal problem to solve with GNU flex, so I wrote a little
849 scanner in flex and C to dig out the tokens for me. Of course, I need
850 the tokens in PHP, so I designed a simple binary wrapper for them which
851 also contains line-number information, guaranteed to help out if you
852 write a grammar which surprises the parser in any manner.
853  
854 */
855 function executable() { return LIME_DIR.'/lime_scan_tokens'; }
856 }
857  
858 function parse_lime_grammar($path) {
859 /*
860  
861 This is a good function to read because it teaches you how to interface
862 with a Lime parser. I've tried to isolate out the bits that aren't
863 instructive in that regard.
864  
865 */
866 if (!class_exists('lime_metaparser')) lime_bootstrap();
867  
868 $parse_engine = new parse_engine(new lime_metaparser());
869 $scanner = new voodoo_scanner($path);
870 try {
871 # The result of parsing a Lime grammar is a Lime object.
872 $lime = $scanner->feed($parse_engine);
873 # Calling its build_parser() method gets the output PHP code.
874 return $lime->build_parser();
875 } catch (parse_error $e) {
876 die ($e->getMessage()." in $path line $scanner->lineno.\n");
877 }
878 }
879  
880  
881 if ($_SERVER['argv']) {
882 $code = '';
883 array_shift($_SERVER['argv']); # Strip out the program name.
884 foreach ($_SERVER['argv'] as $path) {
885 $code .= parse_lime_grammar($path);
886 }
887  
888 echo "<?php\n\n";
889 ?>
890  
891 /*
892  
893 DON'T EDIT THIS FILE!
894  
895 This file was automatically generated by the Lime parser generator.
896 The real source code you should be looking at is in one or more
897 grammar files in the Lime format.
898  
899 THE ONLY REASON TO LOOK AT THIS FILE is to see where in the grammar
900 file that your error happened, because there are enough comments to
901 help you debug your grammar.
902  
903 If you ignore this warning, you're shooting yourself in the brain,
904 not the foot.
905  
906 */
907  
908 <?php
909 echo $code;
910 }