nexmon – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | # ----------------------------------------------------------------------------- |
2 | # ply: yacc.py |
||
3 | # |
||
4 | # Copyright (C) 2001-2015, |
||
5 | # David M. Beazley (Dabeaz LLC) |
||
6 | # All rights reserved. |
||
7 | # |
||
8 | # Redistribution and use in source and binary forms, with or without |
||
9 | # modification, are permitted provided that the following conditions are |
||
10 | # met: |
||
11 | # |
||
12 | # * Redistributions of source code must retain the above copyright notice, |
||
13 | # this list of conditions and the following disclaimer. |
||
14 | # * Redistributions in binary form must reproduce the above copyright notice, |
||
15 | # this list of conditions and the following disclaimer in the documentation |
||
16 | # and/or other materials provided with the distribution. |
||
17 | # * Neither the name of the David Beazley or Dabeaz LLC may be used to |
||
18 | # endorse or promote products derived from this software without |
||
19 | # specific prior written permission. |
||
20 | # |
||
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
||
22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
||
23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
||
24 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
||
25 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
||
26 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
||
27 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
||
28 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
||
29 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
||
30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
||
31 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
||
32 | # ----------------------------------------------------------------------------- |
||
33 | # |
||
34 | # This implements an LR parser that is constructed from grammar rules defined |
||
35 | # as Python functions. The grammer is specified by supplying the BNF inside |
||
36 | # Python documentation strings. The inspiration for this technique was borrowed |
||
37 | # from John Aycock's Spark parsing system. PLY might be viewed as cross between |
||
38 | # Spark and the GNU bison utility. |
||
39 | # |
||
40 | # The current implementation is only somewhat object-oriented. The |
||
41 | # LR parser itself is defined in terms of an object (which allows multiple |
||
42 | # parsers to co-exist). However, most of the variables used during table |
||
43 | # construction are defined in terms of global variables. Users shouldn't |
||
44 | # notice unless they are trying to define multiple parsers at the same |
||
45 | # time using threads (in which case they should have their head examined). |
||
46 | # |
||
47 | # This implementation supports both SLR and LALR(1) parsing. LALR(1) |
||
48 | # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), |
||
49 | # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, |
||
50 | # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced |
||
51 | # by the more efficient DeRemer and Pennello algorithm. |
||
52 | # |
||
53 | # :::::::: WARNING ::::::: |
||
54 | # |
||
55 | # Construction of LR parsing tables is fairly complicated and expensive. |
||
56 | # To make this module run fast, a *LOT* of work has been put into |
||
57 | # optimization---often at the expensive of readability and what might |
||
58 | # consider to be good Python "coding style." Modify the code at your |
||
59 | # own risk! |
||
60 | # ---------------------------------------------------------------------------- |
||
61 | |||
62 | import re |
||
63 | import types |
||
64 | import sys |
||
65 | import os.path |
||
66 | import inspect |
||
67 | import base64 |
||
68 | import warnings |
||
69 | |||
70 | __version__ = '3.8' |
||
71 | __tabversion__ = '3.8' |
||
72 | |||
73 | #----------------------------------------------------------------------------- |
||
74 | # === User configurable parameters === |
||
75 | # |
||
76 | # Change these to modify the default behavior of yacc (if you wish) |
||
77 | #----------------------------------------------------------------------------- |
||
78 | |||
79 | yaccdebug = True # Debugging mode. If set, yacc generates a |
||
80 | # a 'parser.out' file in the current directory |
||
81 | |||
82 | debug_file = 'parser.out' # Default name of the debugging file |
||
83 | tab_module = 'parsetab' # Default name of the table module |
||
84 | default_lr = 'LALR' # Default LR table generation method |
||
85 | |||
86 | error_count = 3 # Number of symbols that must be shifted to leave recovery mode |
||
87 | |||
88 | yaccdevel = False # Set to True if developing yacc. This turns off optimized |
||
89 | # implementations of certain functions. |
||
90 | |||
91 | resultlimit = 40 # Size limit of results when running in debug mode. |
||
92 | |||
93 | pickle_protocol = 0 # Protocol to use when writing pickle files |
||
94 | |||
95 | # String type-checking compatibility |
||
96 | if sys.version_info[0] < 3: |
||
97 | string_types = basestring |
||
98 | else: |
||
99 | string_types = str |
||
100 | |||
101 | MAXINT = sys.maxsize |
||
102 | |||
103 | # This object is a stand-in for a logging object created by the |
||
104 | # logging module. PLY will use this by default to create things |
||
105 | # such as the parser.out file. If a user wants more detailed |
||
106 | # information, they can create their own logging object and pass |
||
107 | # it into PLY. |
||
108 | |||
109 | class PlyLogger(object): |
||
110 | def __init__(self, f): |
||
111 | self.f = f |
||
112 | |||
113 | def debug(self, msg, *args, **kwargs): |
||
114 | self.f.write((msg % args) + '\n') |
||
115 | |||
116 | info = debug |
||
117 | |||
118 | def warning(self, msg, *args, **kwargs): |
||
119 | self.f.write('WARNING: ' + (msg % args) + '\n') |
||
120 | |||
121 | def error(self, msg, *args, **kwargs): |
||
122 | self.f.write('ERROR: ' + (msg % args) + '\n') |
||
123 | |||
124 | critical = debug |
||
125 | |||
126 | # Null logger is used when no output is generated. Does nothing. |
||
127 | class NullLogger(object): |
||
128 | def __getattribute__(self, name): |
||
129 | return self |
||
130 | |||
131 | def __call__(self, *args, **kwargs): |
||
132 | return self |
||
133 | |||
134 | # Exception raised for yacc-related errors |
||
135 | class YaccError(Exception): |
||
136 | pass |
||
137 | |||
138 | # Format the result message that the parser produces when running in debug mode. |
||
139 | def format_result(r): |
||
140 | repr_str = repr(r) |
||
141 | if '\n' in repr_str: |
||
142 | repr_str = repr(repr_str) |
||
143 | if len(repr_str) > resultlimit: |
||
144 | repr_str = repr_str[:resultlimit] + ' ...' |
||
145 | result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) |
||
146 | return result |
||
147 | |||
148 | # Format stack entries when the parser is running in debug mode |
||
149 | def format_stack_entry(r): |
||
150 | repr_str = repr(r) |
||
151 | if '\n' in repr_str: |
||
152 | repr_str = repr(repr_str) |
||
153 | if len(repr_str) < 16: |
||
154 | return repr_str |
||
155 | else: |
||
156 | return '<%s @ 0x%x>' % (type(r).__name__, id(r)) |
||
157 | |||
158 | # Panic mode error recovery support. This feature is being reworked--much of the |
||
159 | # code here is to offer a deprecation/backwards compatible transition |
||
160 | |||
161 | _errok = None |
||
162 | _token = None |
||
163 | _restart = None |
||
164 | _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). |
||
165 | Instead, invoke the methods on the associated parser instance: |
||
166 | |||
167 | def p_error(p): |
||
168 | ... |
||
169 | # Use parser.errok(), parser.token(), parser.restart() |
||
170 | ... |
||
171 | |||
172 | parser = yacc.yacc() |
||
173 | ''' |
||
174 | |||
175 | def errok(): |
||
176 | warnings.warn(_warnmsg) |
||
177 | return _errok() |
||
178 | |||
179 | def restart(): |
||
180 | warnings.warn(_warnmsg) |
||
181 | return _restart() |
||
182 | |||
183 | def token(): |
||
184 | warnings.warn(_warnmsg) |
||
185 | return _token() |
||
186 | |||
187 | # Utility function to call the p_error() function with some deprecation hacks |
||
188 | def call_errorfunc(errorfunc, token, parser): |
||
189 | global _errok, _token, _restart |
||
190 | _errok = parser.errok |
||
191 | _token = parser.token |
||
192 | _restart = parser.restart |
||
193 | r = errorfunc(token) |
||
194 | try: |
||
195 | del _errok, _token, _restart |
||
196 | except NameError: |
||
197 | pass |
||
198 | return r |
||
199 | |||
200 | #----------------------------------------------------------------------------- |
||
201 | # === LR Parsing Engine === |
||
202 | # |
||
203 | # The following classes are used for the LR parser itself. These are not |
||
204 | # used during table construction and are independent of the actual LR |
||
205 | # table generation algorithm |
||
206 | #----------------------------------------------------------------------------- |
||
207 | |||
208 | # This class is used to hold non-terminal grammar symbols during parsing. |
||
209 | # It normally has the following attributes set: |
||
210 | # .type = Grammar symbol type |
||
211 | # .value = Symbol value |
||
212 | # .lineno = Starting line number |
||
213 | # .endlineno = Ending line number (optional, set automatically) |
||
214 | # .lexpos = Starting lex position |
||
215 | # .endlexpos = Ending lex position (optional, set automatically) |
||
216 | |||
217 | class YaccSymbol: |
||
218 | def __str__(self): |
||
219 | return self.type |
||
220 | |||
221 | def __repr__(self): |
||
222 | return str(self) |
||
223 | |||
224 | # This class is a wrapper around the objects actually passed to each |
||
225 | # grammar rule. Index lookup and assignment actually assign the |
||
226 | # .value attribute of the underlying YaccSymbol object. |
||
227 | # The lineno() method returns the line number of a given |
||
228 | # item (or 0 if not defined). The linespan() method returns |
||
229 | # a tuple of (startline,endline) representing the range of lines |
||
230 | # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) |
||
231 | # representing the range of positional information for a symbol. |
||
232 | |||
233 | class YaccProduction: |
||
234 | def __init__(self, s, stack=None): |
||
235 | self.slice = s |
||
236 | self.stack = stack |
||
237 | self.lexer = None |
||
238 | self.parser = None |
||
239 | |||
240 | def __getitem__(self, n): |
||
241 | if isinstance(n, slice): |
||
242 | return [s.value for s in self.slice[n]] |
||
243 | elif n >= 0: |
||
244 | return self.slice[n].value |
||
245 | else: |
||
246 | return self.stack[n].value |
||
247 | |||
248 | def __setitem__(self, n, v): |
||
249 | self.slice[n].value = v |
||
250 | |||
251 | def __getslice__(self, i, j): |
||
252 | return [s.value for s in self.slice[i:j]] |
||
253 | |||
254 | def __len__(self): |
||
255 | return len(self.slice) |
||
256 | |||
257 | def lineno(self, n): |
||
258 | return getattr(self.slice[n], 'lineno', 0) |
||
259 | |||
260 | def set_lineno(self, n, lineno): |
||
261 | self.slice[n].lineno = lineno |
||
262 | |||
263 | def linespan(self, n): |
||
264 | startline = getattr(self.slice[n], 'lineno', 0) |
||
265 | endline = getattr(self.slice[n], 'endlineno', startline) |
||
266 | return startline, endline |
||
267 | |||
268 | def lexpos(self, n): |
||
269 | return getattr(self.slice[n], 'lexpos', 0) |
||
270 | |||
271 | def lexspan(self, n): |
||
272 | startpos = getattr(self.slice[n], 'lexpos', 0) |
||
273 | endpos = getattr(self.slice[n], 'endlexpos', startpos) |
||
274 | return startpos, endpos |
||
275 | |||
276 | def error(self): |
||
277 | raise SyntaxError |
||
278 | |||
279 | # ----------------------------------------------------------------------------- |
||
280 | # == LRParser == |
||
281 | # |
||
282 | # The LR Parsing engine. |
||
283 | # ----------------------------------------------------------------------------- |
||
284 | |||
285 | class LRParser: |
||
286 | def __init__(self, lrtab, errorf): |
||
287 | self.productions = lrtab.lr_productions |
||
288 | self.action = lrtab.lr_action |
||
289 | self.goto = lrtab.lr_goto |
||
290 | self.errorfunc = errorf |
||
291 | self.set_defaulted_states() |
||
292 | self.errorok = True |
||
293 | |||
294 | def errok(self): |
||
295 | self.errorok = True |
||
296 | |||
297 | def restart(self): |
||
298 | del self.statestack[:] |
||
299 | del self.symstack[:] |
||
300 | sym = YaccSymbol() |
||
301 | sym.type = '$end' |
||
302 | self.symstack.append(sym) |
||
303 | self.statestack.append(0) |
||
304 | |||
305 | # Defaulted state support. |
||
306 | # This method identifies parser states where there is only one possible reduction action. |
||
307 | # For such states, the parser can make a choose to make a rule reduction without consuming |
||
308 | # the next look-ahead token. This delayed invocation of the tokenizer can be useful in |
||
309 | # certain kinds of advanced parsing situations where the lexer and parser interact with |
||
310 | # each other or change states (i.e., manipulation of scope, lexer states, etc.). |
||
311 | # |
||
312 | # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions |
||
313 | def set_defaulted_states(self): |
||
314 | self.defaulted_states = {} |
||
315 | for state, actions in self.action.items(): |
||
316 | rules = list(actions.values()) |
||
317 | if len(rules) == 1 and rules[0] < 0: |
||
318 | self.defaulted_states[state] = rules[0] |
||
319 | |||
320 | def disable_defaulted_states(self): |
||
321 | self.defaulted_states = {} |
||
322 | |||
323 | def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): |
||
324 | if debug or yaccdevel: |
||
325 | if isinstance(debug, int): |
||
326 | debug = PlyLogger(sys.stderr) |
||
327 | return self.parsedebug(input, lexer, debug, tracking, tokenfunc) |
||
328 | elif tracking: |
||
329 | return self.parseopt(input, lexer, debug, tracking, tokenfunc) |
||
330 | else: |
||
331 | return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) |
||
332 | |||
333 | |||
334 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
335 | # parsedebug(). |
||
336 | # |
||
337 | # This is the debugging enabled version of parse(). All changes made to the |
||
338 | # parsing engine should be made here. Optimized versions of this function |
||
339 | # are automatically created by the ply/ygen.py script. This script cuts out |
||
340 | # sections enclosed in markers such as this: |
||
341 | # |
||
342 | # #--! DEBUG |
||
343 | # statements |
||
344 | # #--! DEBUG |
||
345 | # |
||
346 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
347 | |||
348 | def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): |
||
349 | #--! parsedebug-start |
||
350 | lookahead = None # Current lookahead symbol |
||
351 | lookaheadstack = [] # Stack of lookahead symbols |
||
352 | actions = self.action # Local reference to action table (to avoid lookup on self.) |
||
353 | goto = self.goto # Local reference to goto table (to avoid lookup on self.) |
||
354 | prod = self.productions # Local reference to production list (to avoid lookup on self.) |
||
355 | defaulted_states = self.defaulted_states # Local reference to defaulted states |
||
356 | pslice = YaccProduction(None) # Production object passed to grammar rules |
||
357 | errorcount = 0 # Used during error recovery |
||
358 | |||
359 | #--! DEBUG |
||
360 | debug.info('PLY: PARSE DEBUG START') |
||
361 | #--! DEBUG |
||
362 | |||
363 | # If no lexer was given, we will try to use the lex module |
||
364 | if not lexer: |
||
365 | from . import lex |
||
366 | lexer = lex.lexer |
||
367 | |||
368 | # Set up the lexer and parser objects on pslice |
||
369 | pslice.lexer = lexer |
||
370 | pslice.parser = self |
||
371 | |||
372 | # If input was supplied, pass to lexer |
||
373 | if input is not None: |
||
374 | lexer.input(input) |
||
375 | |||
376 | if tokenfunc is None: |
||
377 | # Tokenize function |
||
378 | get_token = lexer.token |
||
379 | else: |
||
380 | get_token = tokenfunc |
||
381 | |||
382 | # Set the parser() token method (sometimes used in error recovery) |
||
383 | self.token = get_token |
||
384 | |||
385 | # Set up the state and symbol stacks |
||
386 | |||
387 | statestack = [] # Stack of parsing states |
||
388 | self.statestack = statestack |
||
389 | symstack = [] # Stack of grammar symbols |
||
390 | self.symstack = symstack |
||
391 | |||
392 | pslice.stack = symstack # Put in the production |
||
393 | errtoken = None # Err token |
||
394 | |||
395 | # The start state is assumed to be (0,$end) |
||
396 | |||
397 | statestack.append(0) |
||
398 | sym = YaccSymbol() |
||
399 | sym.type = '$end' |
||
400 | symstack.append(sym) |
||
401 | state = 0 |
||
402 | while True: |
||
403 | # Get the next symbol on the input. If a lookahead symbol |
||
404 | # is already set, we just use that. Otherwise, we'll pull |
||
405 | # the next token off of the lookaheadstack or from the lexer |
||
406 | |||
407 | #--! DEBUG |
||
408 | debug.debug('') |
||
409 | debug.debug('State : %s', state) |
||
410 | #--! DEBUG |
||
411 | |||
412 | if state not in defaulted_states: |
||
413 | if not lookahead: |
||
414 | if not lookaheadstack: |
||
415 | lookahead = get_token() # Get the next token |
||
416 | else: |
||
417 | lookahead = lookaheadstack.pop() |
||
418 | if not lookahead: |
||
419 | lookahead = YaccSymbol() |
||
420 | lookahead.type = '$end' |
||
421 | |||
422 | # Check the action table |
||
423 | ltype = lookahead.type |
||
424 | t = actions[state].get(ltype) |
||
425 | else: |
||
426 | t = defaulted_states[state] |
||
427 | #--! DEBUG |
||
428 | debug.debug('Defaulted state %s: Reduce using %d', state, -t) |
||
429 | #--! DEBUG |
||
430 | |||
431 | #--! DEBUG |
||
432 | debug.debug('Stack : %s', |
||
433 | ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) |
||
434 | #--! DEBUG |
||
435 | |||
436 | if t is not None: |
||
437 | if t > 0: |
||
438 | # shift a symbol on the stack |
||
439 | statestack.append(t) |
||
440 | state = t |
||
441 | |||
442 | #--! DEBUG |
||
443 | debug.debug('Action : Shift and goto state %s', t) |
||
444 | #--! DEBUG |
||
445 | |||
446 | symstack.append(lookahead) |
||
447 | lookahead = None |
||
448 | |||
449 | # Decrease error count on successful shift |
||
450 | if errorcount: |
||
451 | errorcount -= 1 |
||
452 | continue |
||
453 | |||
454 | if t < 0: |
||
455 | # reduce a symbol on the stack, emit a production |
||
456 | p = prod[-t] |
||
457 | pname = p.name |
||
458 | plen = p.len |
||
459 | |||
460 | # Get production function |
||
461 | sym = YaccSymbol() |
||
462 | sym.type = pname # Production name |
||
463 | sym.value = None |
||
464 | |||
465 | #--! DEBUG |
||
466 | if plen: |
||
467 | debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, |
||
468 | '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', |
||
469 | goto[statestack[-1-plen]][pname]) |
||
470 | else: |
||
471 | debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], |
||
472 | goto[statestack[-1]][pname]) |
||
473 | |||
474 | #--! DEBUG |
||
475 | |||
476 | if plen: |
||
477 | targ = symstack[-plen-1:] |
||
478 | targ[0] = sym |
||
479 | |||
480 | #--! TRACKING |
||
481 | if tracking: |
||
482 | t1 = targ[1] |
||
483 | sym.lineno = t1.lineno |
||
484 | sym.lexpos = t1.lexpos |
||
485 | t1 = targ[-1] |
||
486 | sym.endlineno = getattr(t1, 'endlineno', t1.lineno) |
||
487 | sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) |
||
488 | #--! TRACKING |
||
489 | |||
490 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
491 | # The code enclosed in this section is duplicated |
||
492 | # below as a performance optimization. Make sure |
||
493 | # changes get made in both locations. |
||
494 | |||
495 | pslice.slice = targ |
||
496 | |||
497 | try: |
||
498 | # Call the grammar rule with our special slice object |
||
499 | del symstack[-plen:] |
||
500 | del statestack[-plen:] |
||
501 | p.callable(pslice) |
||
502 | #--! DEBUG |
||
503 | debug.info('Result : %s', format_result(pslice[0])) |
||
504 | #--! DEBUG |
||
505 | symstack.append(sym) |
||
506 | state = goto[statestack[-1]][pname] |
||
507 | statestack.append(state) |
||
508 | except SyntaxError: |
||
509 | # If an error was set. Enter error recovery state |
||
510 | lookaheadstack.append(lookahead) |
||
511 | symstack.pop() |
||
512 | statestack.pop() |
||
513 | state = statestack[-1] |
||
514 | sym.type = 'error' |
||
515 | lookahead = sym |
||
516 | errorcount = error_count |
||
517 | self.errorok = False |
||
518 | continue |
||
519 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
520 | |||
521 | else: |
||
522 | |||
523 | #--! TRACKING |
||
524 | if tracking: |
||
525 | sym.lineno = lexer.lineno |
||
526 | sym.lexpos = lexer.lexpos |
||
527 | #--! TRACKING |
||
528 | |||
529 | targ = [sym] |
||
530 | |||
531 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
532 | # The code enclosed in this section is duplicated |
||
533 | # above as a performance optimization. Make sure |
||
534 | # changes get made in both locations. |
||
535 | |||
536 | pslice.slice = targ |
||
537 | |||
538 | try: |
||
539 | # Call the grammar rule with our special slice object |
||
540 | p.callable(pslice) |
||
541 | #--! DEBUG |
||
542 | debug.info('Result : %s', format_result(pslice[0])) |
||
543 | #--! DEBUG |
||
544 | symstack.append(sym) |
||
545 | state = goto[statestack[-1]][pname] |
||
546 | statestack.append(state) |
||
547 | except SyntaxError: |
||
548 | # If an error was set. Enter error recovery state |
||
549 | lookaheadstack.append(lookahead) |
||
550 | symstack.pop() |
||
551 | statestack.pop() |
||
552 | state = statestack[-1] |
||
553 | sym.type = 'error' |
||
554 | lookahead = sym |
||
555 | errorcount = error_count |
||
556 | self.errorok = False |
||
557 | continue |
||
558 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
559 | |||
560 | if t == 0: |
||
561 | n = symstack[-1] |
||
562 | result = getattr(n, 'value', None) |
||
563 | #--! DEBUG |
||
564 | debug.info('Done : Returning %s', format_result(result)) |
||
565 | debug.info('PLY: PARSE DEBUG END') |
||
566 | #--! DEBUG |
||
567 | return result |
||
568 | |||
569 | if t is None: |
||
570 | |||
571 | #--! DEBUG |
||
572 | debug.error('Error : %s', |
||
573 | ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) |
||
574 | #--! DEBUG |
||
575 | |||
576 | # We have some kind of parsing error here. To handle |
||
577 | # this, we are going to push the current token onto |
||
578 | # the tokenstack and replace it with an 'error' token. |
||
579 | # If there are any synchronization rules, they may |
||
580 | # catch it. |
||
581 | # |
||
582 | # In addition to pushing the error token, we call call |
||
583 | # the user defined p_error() function if this is the |
||
584 | # first syntax error. This function is only called if |
||
585 | # errorcount == 0. |
||
586 | if errorcount == 0 or self.errorok: |
||
587 | errorcount = error_count |
||
588 | self.errorok = False |
||
589 | errtoken = lookahead |
||
590 | if errtoken.type == '$end': |
||
591 | errtoken = None # End of file! |
||
592 | if self.errorfunc: |
||
593 | if errtoken and not hasattr(errtoken, 'lexer'): |
||
594 | errtoken.lexer = lexer |
||
595 | tok = call_errorfunc(self.errorfunc, errtoken, self) |
||
596 | if self.errorok: |
||
597 | # User must have done some kind of panic |
||
598 | # mode recovery on their own. The |
||
599 | # returned token is the next lookahead |
||
600 | lookahead = tok |
||
601 | errtoken = None |
||
602 | continue |
||
603 | else: |
||
604 | if errtoken: |
||
605 | if hasattr(errtoken, 'lineno'): |
||
606 | lineno = lookahead.lineno |
||
607 | else: |
||
608 | lineno = 0 |
||
609 | if lineno: |
||
610 | sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) |
||
611 | else: |
||
612 | sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) |
||
613 | else: |
||
614 | sys.stderr.write('yacc: Parse error in input. EOF\n') |
||
615 | return |
||
616 | |||
617 | else: |
||
618 | errorcount = error_count |
||
619 | |||
620 | # case 1: the statestack only has 1 entry on it. If we're in this state, the |
||
621 | # entire parse has been rolled back and we're completely hosed. The token is |
||
622 | # discarded and we just keep going. |
||
623 | |||
624 | if len(statestack) <= 1 and lookahead.type != '$end': |
||
625 | lookahead = None |
||
626 | errtoken = None |
||
627 | state = 0 |
||
628 | # Nuke the pushback stack |
||
629 | del lookaheadstack[:] |
||
630 | continue |
||
631 | |||
632 | # case 2: the statestack has a couple of entries on it, but we're |
||
633 | # at the end of the file. nuke the top entry and generate an error token |
||
634 | |||
635 | # Start nuking entries on the stack |
||
636 | if lookahead.type == '$end': |
||
637 | # Whoa. We're really hosed here. Bail out |
||
638 | return |
||
639 | |||
640 | if lookahead.type != 'error': |
||
641 | sym = symstack[-1] |
||
642 | if sym.type == 'error': |
||
643 | # Hmmm. Error is on top of stack, we'll just nuke input |
||
644 | # symbol and continue |
||
645 | #--! TRACKING |
||
646 | if tracking: |
||
647 | sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) |
||
648 | sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) |
||
649 | #--! TRACKING |
||
650 | lookahead = None |
||
651 | continue |
||
652 | |||
653 | # Create the error symbol for the first time and make it the new lookahead symbol |
||
654 | t = YaccSymbol() |
||
655 | t.type = 'error' |
||
656 | |||
657 | if hasattr(lookahead, 'lineno'): |
||
658 | t.lineno = t.endlineno = lookahead.lineno |
||
659 | if hasattr(lookahead, 'lexpos'): |
||
660 | t.lexpos = t.endlexpos = lookahead.lexpos |
||
661 | t.value = lookahead |
||
662 | lookaheadstack.append(lookahead) |
||
663 | lookahead = t |
||
664 | else: |
||
665 | sym = symstack.pop() |
||
666 | #--! TRACKING |
||
667 | if tracking: |
||
668 | lookahead.lineno = sym.lineno |
||
669 | lookahead.lexpos = sym.lexpos |
||
670 | #--! TRACKING |
||
671 | statestack.pop() |
||
672 | state = statestack[-1] |
||
673 | |||
674 | continue |
||
675 | |||
676 | # Call an error function here |
||
677 | raise RuntimeError('yacc: internal parser error!!!\n') |
||
678 | |||
679 | #--! parsedebug-end |
||
680 | |||
681 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
682 | # parseopt(). |
||
683 | # |
||
684 | # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! |
||
685 | # This code is automatically generated by the ply/ygen.py script. Make |
||
686 | # changes to the parsedebug() method instead. |
||
687 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
688 | |||
689 | def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): |
||
690 | #--! parseopt-start |
||
691 | lookahead = None # Current lookahead symbol |
||
692 | lookaheadstack = [] # Stack of lookahead symbols |
||
693 | actions = self.action # Local reference to action table (to avoid lookup on self.) |
||
694 | goto = self.goto # Local reference to goto table (to avoid lookup on self.) |
||
695 | prod = self.productions # Local reference to production list (to avoid lookup on self.) |
||
696 | defaulted_states = self.defaulted_states # Local reference to defaulted states |
||
697 | pslice = YaccProduction(None) # Production object passed to grammar rules |
||
698 | errorcount = 0 # Used during error recovery |
||
699 | |||
700 | |||
701 | # If no lexer was given, we will try to use the lex module |
||
702 | if not lexer: |
||
703 | from . import lex |
||
704 | lexer = lex.lexer |
||
705 | |||
706 | # Set up the lexer and parser objects on pslice |
||
707 | pslice.lexer = lexer |
||
708 | pslice.parser = self |
||
709 | |||
710 | # If input was supplied, pass to lexer |
||
711 | if input is not None: |
||
712 | lexer.input(input) |
||
713 | |||
714 | if tokenfunc is None: |
||
715 | # Tokenize function |
||
716 | get_token = lexer.token |
||
717 | else: |
||
718 | get_token = tokenfunc |
||
719 | |||
720 | # Set the parser() token method (sometimes used in error recovery) |
||
721 | self.token = get_token |
||
722 | |||
723 | # Set up the state and symbol stacks |
||
724 | |||
725 | statestack = [] # Stack of parsing states |
||
726 | self.statestack = statestack |
||
727 | symstack = [] # Stack of grammar symbols |
||
728 | self.symstack = symstack |
||
729 | |||
730 | pslice.stack = symstack # Put in the production |
||
731 | errtoken = None # Err token |
||
732 | |||
733 | # The start state is assumed to be (0,$end) |
||
734 | |||
735 | statestack.append(0) |
||
736 | sym = YaccSymbol() |
||
737 | sym.type = '$end' |
||
738 | symstack.append(sym) |
||
739 | state = 0 |
||
740 | while True: |
||
741 | # Get the next symbol on the input. If a lookahead symbol |
||
742 | # is already set, we just use that. Otherwise, we'll pull |
||
743 | # the next token off of the lookaheadstack or from the lexer |
||
744 | |||
745 | |||
746 | if state not in defaulted_states: |
||
747 | if not lookahead: |
||
748 | if not lookaheadstack: |
||
749 | lookahead = get_token() # Get the next token |
||
750 | else: |
||
751 | lookahead = lookaheadstack.pop() |
||
752 | if not lookahead: |
||
753 | lookahead = YaccSymbol() |
||
754 | lookahead.type = '$end' |
||
755 | |||
756 | # Check the action table |
||
757 | ltype = lookahead.type |
||
758 | t = actions[state].get(ltype) |
||
759 | else: |
||
760 | t = defaulted_states[state] |
||
761 | |||
762 | |||
763 | if t is not None: |
||
764 | if t > 0: |
||
765 | # shift a symbol on the stack |
||
766 | statestack.append(t) |
||
767 | state = t |
||
768 | |||
769 | |||
770 | symstack.append(lookahead) |
||
771 | lookahead = None |
||
772 | |||
773 | # Decrease error count on successful shift |
||
774 | if errorcount: |
||
775 | errorcount -= 1 |
||
776 | continue |
||
777 | |||
778 | if t < 0: |
||
779 | # reduce a symbol on the stack, emit a production |
||
780 | p = prod[-t] |
||
781 | pname = p.name |
||
782 | plen = p.len |
||
783 | |||
784 | # Get production function |
||
785 | sym = YaccSymbol() |
||
786 | sym.type = pname # Production name |
||
787 | sym.value = None |
||
788 | |||
789 | |||
790 | if plen: |
||
791 | targ = symstack[-plen-1:] |
||
792 | targ[0] = sym |
||
793 | |||
794 | #--! TRACKING |
||
795 | if tracking: |
||
796 | t1 = targ[1] |
||
797 | sym.lineno = t1.lineno |
||
798 | sym.lexpos = t1.lexpos |
||
799 | t1 = targ[-1] |
||
800 | sym.endlineno = getattr(t1, 'endlineno', t1.lineno) |
||
801 | sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) |
||
802 | #--! TRACKING |
||
803 | |||
804 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
805 | # The code enclosed in this section is duplicated |
||
806 | # below as a performance optimization. Make sure |
||
807 | # changes get made in both locations. |
||
808 | |||
809 | pslice.slice = targ |
||
810 | |||
811 | try: |
||
812 | # Call the grammar rule with our special slice object |
||
813 | del symstack[-plen:] |
||
814 | del statestack[-plen:] |
||
815 | p.callable(pslice) |
||
816 | symstack.append(sym) |
||
817 | state = goto[statestack[-1]][pname] |
||
818 | statestack.append(state) |
||
819 | except SyntaxError: |
||
820 | # If an error was set. Enter error recovery state |
||
821 | lookaheadstack.append(lookahead) |
||
822 | symstack.pop() |
||
823 | statestack.pop() |
||
824 | state = statestack[-1] |
||
825 | sym.type = 'error' |
||
826 | lookahead = sym |
||
827 | errorcount = error_count |
||
828 | self.errorok = False |
||
829 | continue |
||
830 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
831 | |||
832 | else: |
||
833 | |||
834 | #--! TRACKING |
||
835 | if tracking: |
||
836 | sym.lineno = lexer.lineno |
||
837 | sym.lexpos = lexer.lexpos |
||
838 | #--! TRACKING |
||
839 | |||
840 | targ = [sym] |
||
841 | |||
842 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
843 | # The code enclosed in this section is duplicated |
||
844 | # above as a performance optimization. Make sure |
||
845 | # changes get made in both locations. |
||
846 | |||
847 | pslice.slice = targ |
||
848 | |||
849 | try: |
||
850 | # Call the grammar rule with our special slice object |
||
851 | p.callable(pslice) |
||
852 | symstack.append(sym) |
||
853 | state = goto[statestack[-1]][pname] |
||
854 | statestack.append(state) |
||
855 | except SyntaxError: |
||
856 | # If an error was set. Enter error recovery state |
||
857 | lookaheadstack.append(lookahead) |
||
858 | symstack.pop() |
||
859 | statestack.pop() |
||
860 | state = statestack[-1] |
||
861 | sym.type = 'error' |
||
862 | lookahead = sym |
||
863 | errorcount = error_count |
||
864 | self.errorok = False |
||
865 | continue |
||
866 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
867 | |||
868 | if t == 0: |
||
869 | n = symstack[-1] |
||
870 | result = getattr(n, 'value', None) |
||
871 | return result |
||
872 | |||
873 | if t is None: |
||
874 | |||
875 | |||
876 | # We have some kind of parsing error here. To handle |
||
877 | # this, we are going to push the current token onto |
||
878 | # the tokenstack and replace it with an 'error' token. |
||
879 | # If there are any synchronization rules, they may |
||
880 | # catch it. |
||
881 | # |
||
882 | # In addition to pushing the error token, we call call |
||
883 | # the user defined p_error() function if this is the |
||
884 | # first syntax error. This function is only called if |
||
885 | # errorcount == 0. |
||
886 | if errorcount == 0 or self.errorok: |
||
887 | errorcount = error_count |
||
888 | self.errorok = False |
||
889 | errtoken = lookahead |
||
890 | if errtoken.type == '$end': |
||
891 | errtoken = None # End of file! |
||
892 | if self.errorfunc: |
||
893 | if errtoken and not hasattr(errtoken, 'lexer'): |
||
894 | errtoken.lexer = lexer |
||
895 | tok = call_errorfunc(self.errorfunc, errtoken, self) |
||
896 | if self.errorok: |
||
897 | # User must have done some kind of panic |
||
898 | # mode recovery on their own. The |
||
899 | # returned token is the next lookahead |
||
900 | lookahead = tok |
||
901 | errtoken = None |
||
902 | continue |
||
903 | else: |
||
904 | if errtoken: |
||
905 | if hasattr(errtoken, 'lineno'): |
||
906 | lineno = lookahead.lineno |
||
907 | else: |
||
908 | lineno = 0 |
||
909 | if lineno: |
||
910 | sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) |
||
911 | else: |
||
912 | sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) |
||
913 | else: |
||
914 | sys.stderr.write('yacc: Parse error in input. EOF\n') |
||
915 | return |
||
916 | |||
917 | else: |
||
918 | errorcount = error_count |
||
919 | |||
920 | # case 1: the statestack only has 1 entry on it. If we're in this state, the |
||
921 | # entire parse has been rolled back and we're completely hosed. The token is |
||
922 | # discarded and we just keep going. |
||
923 | |||
924 | if len(statestack) <= 1 and lookahead.type != '$end': |
||
925 | lookahead = None |
||
926 | errtoken = None |
||
927 | state = 0 |
||
928 | # Nuke the pushback stack |
||
929 | del lookaheadstack[:] |
||
930 | continue |
||
931 | |||
932 | # case 2: the statestack has a couple of entries on it, but we're |
||
933 | # at the end of the file. nuke the top entry and generate an error token |
||
934 | |||
935 | # Start nuking entries on the stack |
||
936 | if lookahead.type == '$end': |
||
937 | # Whoa. We're really hosed here. Bail out |
||
938 | return |
||
939 | |||
940 | if lookahead.type != 'error': |
||
941 | sym = symstack[-1] |
||
942 | if sym.type == 'error': |
||
943 | # Hmmm. Error is on top of stack, we'll just nuke input |
||
944 | # symbol and continue |
||
945 | #--! TRACKING |
||
946 | if tracking: |
||
947 | sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) |
||
948 | sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) |
||
949 | #--! TRACKING |
||
950 | lookahead = None |
||
951 | continue |
||
952 | |||
953 | # Create the error symbol for the first time and make it the new lookahead symbol |
||
954 | t = YaccSymbol() |
||
955 | t.type = 'error' |
||
956 | |||
957 | if hasattr(lookahead, 'lineno'): |
||
958 | t.lineno = t.endlineno = lookahead.lineno |
||
959 | if hasattr(lookahead, 'lexpos'): |
||
960 | t.lexpos = t.endlexpos = lookahead.lexpos |
||
961 | t.value = lookahead |
||
962 | lookaheadstack.append(lookahead) |
||
963 | lookahead = t |
||
964 | else: |
||
965 | sym = symstack.pop() |
||
966 | #--! TRACKING |
||
967 | if tracking: |
||
968 | lookahead.lineno = sym.lineno |
||
969 | lookahead.lexpos = sym.lexpos |
||
970 | #--! TRACKING |
||
971 | statestack.pop() |
||
972 | state = statestack[-1] |
||
973 | |||
974 | continue |
||
975 | |||
976 | # Call an error function here |
||
977 | raise RuntimeError('yacc: internal parser error!!!\n') |
||
978 | |||
979 | #--! parseopt-end |
||
980 | |||
981 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
982 | # parseopt_notrack(). |
||
983 | # |
||
984 | # Optimized version of parseopt() with line number tracking removed. |
||
985 | # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated |
||
986 | # by the ply/ygen.py script. Make changes to the parsedebug() method instead. |
||
987 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
988 | |||
989 | def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): |
||
990 | #--! parseopt-notrack-start |
||
991 | lookahead = None # Current lookahead symbol |
||
992 | lookaheadstack = [] # Stack of lookahead symbols |
||
993 | actions = self.action # Local reference to action table (to avoid lookup on self.) |
||
994 | goto = self.goto # Local reference to goto table (to avoid lookup on self.) |
||
995 | prod = self.productions # Local reference to production list (to avoid lookup on self.) |
||
996 | defaulted_states = self.defaulted_states # Local reference to defaulted states |
||
997 | pslice = YaccProduction(None) # Production object passed to grammar rules |
||
998 | errorcount = 0 # Used during error recovery |
||
999 | |||
1000 | |||
1001 | # If no lexer was given, we will try to use the lex module |
||
1002 | if not lexer: |
||
1003 | from . import lex |
||
1004 | lexer = lex.lexer |
||
1005 | |||
1006 | # Set up the lexer and parser objects on pslice |
||
1007 | pslice.lexer = lexer |
||
1008 | pslice.parser = self |
||
1009 | |||
1010 | # If input was supplied, pass to lexer |
||
1011 | if input is not None: |
||
1012 | lexer.input(input) |
||
1013 | |||
1014 | if tokenfunc is None: |
||
1015 | # Tokenize function |
||
1016 | get_token = lexer.token |
||
1017 | else: |
||
1018 | get_token = tokenfunc |
||
1019 | |||
1020 | # Set the parser() token method (sometimes used in error recovery) |
||
1021 | self.token = get_token |
||
1022 | |||
1023 | # Set up the state and symbol stacks |
||
1024 | |||
1025 | statestack = [] # Stack of parsing states |
||
1026 | self.statestack = statestack |
||
1027 | symstack = [] # Stack of grammar symbols |
||
1028 | self.symstack = symstack |
||
1029 | |||
1030 | pslice.stack = symstack # Put in the production |
||
1031 | errtoken = None # Err token |
||
1032 | |||
1033 | # The start state is assumed to be (0,$end) |
||
1034 | |||
1035 | statestack.append(0) |
||
1036 | sym = YaccSymbol() |
||
1037 | sym.type = '$end' |
||
1038 | symstack.append(sym) |
||
1039 | state = 0 |
||
1040 | while True: |
||
1041 | # Get the next symbol on the input. If a lookahead symbol |
||
1042 | # is already set, we just use that. Otherwise, we'll pull |
||
1043 | # the next token off of the lookaheadstack or from the lexer |
||
1044 | |||
1045 | |||
1046 | if state not in defaulted_states: |
||
1047 | if not lookahead: |
||
1048 | if not lookaheadstack: |
||
1049 | lookahead = get_token() # Get the next token |
||
1050 | else: |
||
1051 | lookahead = lookaheadstack.pop() |
||
1052 | if not lookahead: |
||
1053 | lookahead = YaccSymbol() |
||
1054 | lookahead.type = '$end' |
||
1055 | |||
1056 | # Check the action table |
||
1057 | ltype = lookahead.type |
||
1058 | t = actions[state].get(ltype) |
||
1059 | else: |
||
1060 | t = defaulted_states[state] |
||
1061 | |||
1062 | |||
1063 | if t is not None: |
||
1064 | if t > 0: |
||
1065 | # shift a symbol on the stack |
||
1066 | statestack.append(t) |
||
1067 | state = t |
||
1068 | |||
1069 | |||
1070 | symstack.append(lookahead) |
||
1071 | lookahead = None |
||
1072 | |||
1073 | # Decrease error count on successful shift |
||
1074 | if errorcount: |
||
1075 | errorcount -= 1 |
||
1076 | continue |
||
1077 | |||
1078 | if t < 0: |
||
1079 | # reduce a symbol on the stack, emit a production |
||
1080 | p = prod[-t] |
||
1081 | pname = p.name |
||
1082 | plen = p.len |
||
1083 | |||
1084 | # Get production function |
||
1085 | sym = YaccSymbol() |
||
1086 | sym.type = pname # Production name |
||
1087 | sym.value = None |
||
1088 | |||
1089 | |||
1090 | if plen: |
||
1091 | targ = symstack[-plen-1:] |
||
1092 | targ[0] = sym |
||
1093 | |||
1094 | |||
1095 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
1096 | # The code enclosed in this section is duplicated |
||
1097 | # below as a performance optimization. Make sure |
||
1098 | # changes get made in both locations. |
||
1099 | |||
1100 | pslice.slice = targ |
||
1101 | |||
1102 | try: |
||
1103 | # Call the grammar rule with our special slice object |
||
1104 | del symstack[-plen:] |
||
1105 | del statestack[-plen:] |
||
1106 | p.callable(pslice) |
||
1107 | symstack.append(sym) |
||
1108 | state = goto[statestack[-1]][pname] |
||
1109 | statestack.append(state) |
||
1110 | except SyntaxError: |
||
1111 | # If an error was set. Enter error recovery state |
||
1112 | lookaheadstack.append(lookahead) |
||
1113 | symstack.pop() |
||
1114 | statestack.pop() |
||
1115 | state = statestack[-1] |
||
1116 | sym.type = 'error' |
||
1117 | lookahead = sym |
||
1118 | errorcount = error_count |
||
1119 | self.errorok = False |
||
1120 | continue |
||
1121 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
1122 | |||
1123 | else: |
||
1124 | |||
1125 | |||
1126 | targ = [sym] |
||
1127 | |||
1128 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
1129 | # The code enclosed in this section is duplicated |
||
1130 | # above as a performance optimization. Make sure |
||
1131 | # changes get made in both locations. |
||
1132 | |||
1133 | pslice.slice = targ |
||
1134 | |||
1135 | try: |
||
1136 | # Call the grammar rule with our special slice object |
||
1137 | p.callable(pslice) |
||
1138 | symstack.append(sym) |
||
1139 | state = goto[statestack[-1]][pname] |
||
1140 | statestack.append(state) |
||
1141 | except SyntaxError: |
||
1142 | # If an error was set. Enter error recovery state |
||
1143 | lookaheadstack.append(lookahead) |
||
1144 | symstack.pop() |
||
1145 | statestack.pop() |
||
1146 | state = statestack[-1] |
||
1147 | sym.type = 'error' |
||
1148 | lookahead = sym |
||
1149 | errorcount = error_count |
||
1150 | self.errorok = False |
||
1151 | continue |
||
1152 | # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
||
1153 | |||
1154 | if t == 0: |
||
1155 | n = symstack[-1] |
||
1156 | result = getattr(n, 'value', None) |
||
1157 | return result |
||
1158 | |||
1159 | if t is None: |
||
1160 | |||
1161 | |||
1162 | # We have some kind of parsing error here. To handle |
||
1163 | # this, we are going to push the current token onto |
||
1164 | # the tokenstack and replace it with an 'error' token. |
||
1165 | # If there are any synchronization rules, they may |
||
1166 | # catch it. |
||
1167 | # |
||
1168 | # In addition to pushing the error token, we call call |
||
1169 | # the user defined p_error() function if this is the |
||
1170 | # first syntax error. This function is only called if |
||
1171 | # errorcount == 0. |
||
1172 | if errorcount == 0 or self.errorok: |
||
1173 | errorcount = error_count |
||
1174 | self.errorok = False |
||
1175 | errtoken = lookahead |
||
1176 | if errtoken.type == '$end': |
||
1177 | errtoken = None # End of file! |
||
1178 | if self.errorfunc: |
||
1179 | if errtoken and not hasattr(errtoken, 'lexer'): |
||
1180 | errtoken.lexer = lexer |
||
1181 | tok = call_errorfunc(self.errorfunc, errtoken, self) |
||
1182 | if self.errorok: |
||
1183 | # User must have done some kind of panic |
||
1184 | # mode recovery on their own. The |
||
1185 | # returned token is the next lookahead |
||
1186 | lookahead = tok |
||
1187 | errtoken = None |
||
1188 | continue |
||
1189 | else: |
||
1190 | if errtoken: |
||
1191 | if hasattr(errtoken, 'lineno'): |
||
1192 | lineno = lookahead.lineno |
||
1193 | else: |
||
1194 | lineno = 0 |
||
1195 | if lineno: |
||
1196 | sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) |
||
1197 | else: |
||
1198 | sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) |
||
1199 | else: |
||
1200 | sys.stderr.write('yacc: Parse error in input. EOF\n') |
||
1201 | return |
||
1202 | |||
1203 | else: |
||
1204 | errorcount = error_count |
||
1205 | |||
1206 | # case 1: the statestack only has 1 entry on it. If we're in this state, the |
||
1207 | # entire parse has been rolled back and we're completely hosed. The token is |
||
1208 | # discarded and we just keep going. |
||
1209 | |||
1210 | if len(statestack) <= 1 and lookahead.type != '$end': |
||
1211 | lookahead = None |
||
1212 | errtoken = None |
||
1213 | state = 0 |
||
1214 | # Nuke the pushback stack |
||
1215 | del lookaheadstack[:] |
||
1216 | continue |
||
1217 | |||
1218 | # case 2: the statestack has a couple of entries on it, but we're |
||
1219 | # at the end of the file. nuke the top entry and generate an error token |
||
1220 | |||
1221 | # Start nuking entries on the stack |
||
1222 | if lookahead.type == '$end': |
||
1223 | # Whoa. We're really hosed here. Bail out |
||
1224 | return |
||
1225 | |||
1226 | if lookahead.type != 'error': |
||
1227 | sym = symstack[-1] |
||
1228 | if sym.type == 'error': |
||
1229 | # Hmmm. Error is on top of stack, we'll just nuke input |
||
1230 | # symbol and continue |
||
1231 | lookahead = None |
||
1232 | continue |
||
1233 | |||
1234 | # Create the error symbol for the first time and make it the new lookahead symbol |
||
1235 | t = YaccSymbol() |
||
1236 | t.type = 'error' |
||
1237 | |||
1238 | if hasattr(lookahead, 'lineno'): |
||
1239 | t.lineno = t.endlineno = lookahead.lineno |
||
1240 | if hasattr(lookahead, 'lexpos'): |
||
1241 | t.lexpos = t.endlexpos = lookahead.lexpos |
||
1242 | t.value = lookahead |
||
1243 | lookaheadstack.append(lookahead) |
||
1244 | lookahead = t |
||
1245 | else: |
||
1246 | sym = symstack.pop() |
||
1247 | statestack.pop() |
||
1248 | state = statestack[-1] |
||
1249 | |||
1250 | continue |
||
1251 | |||
1252 | # Call an error function here |
||
1253 | raise RuntimeError('yacc: internal parser error!!!\n') |
||
1254 | |||
1255 | #--! parseopt-notrack-end |
||
1256 | |||
1257 | # ----------------------------------------------------------------------------- |
||
1258 | # === Grammar Representation === |
||
1259 | # |
||
1260 | # The following functions, classes, and variables are used to represent and |
||
1261 | # manipulate the rules that make up a grammar. |
||
1262 | # ----------------------------------------------------------------------------- |
||
1263 | |||
1264 | # regex matching identifiers |
||
1265 | _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') |
||
1266 | |||
1267 | # ----------------------------------------------------------------------------- |
||
1268 | # class Production: |
||
1269 | # |
||
1270 | # This class stores the raw information about a single production or grammar rule. |
||
1271 | # A grammar rule refers to a specification such as this: |
||
1272 | # |
||
1273 | # expr : expr PLUS term |
||
1274 | # |
||
1275 | # Here are the basic attributes defined on all productions |
||
1276 | # |
||
1277 | # name - Name of the production. For example 'expr' |
||
1278 | # prod - A list of symbols on the right side ['expr','PLUS','term'] |
||
1279 | # prec - Production precedence level |
||
1280 | # number - Production number. |
||
1281 | # func - Function that executes on reduce |
||
1282 | # file - File where production function is defined |
||
1283 | # lineno - Line number where production function is defined |
||
1284 | # |
||
1285 | # The following attributes are defined or optional. |
||
1286 | # |
||
1287 | # len - Length of the production (number of symbols on right hand side) |
||
1288 | # usyms - Set of unique symbols found in the production |
||
1289 | # ----------------------------------------------------------------------------- |
||
1290 | |||
1291 | class Production(object): |
||
1292 | reduced = 0 |
||
1293 | def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): |
||
1294 | self.name = name |
||
1295 | self.prod = tuple(prod) |
||
1296 | self.number = number |
||
1297 | self.func = func |
||
1298 | self.callable = None |
||
1299 | self.file = file |
||
1300 | self.line = line |
||
1301 | self.prec = precedence |
||
1302 | |||
1303 | # Internal settings used during table construction |
||
1304 | |||
1305 | self.len = len(self.prod) # Length of the production |
||
1306 | |||
1307 | # Create a list of unique production symbols used in the production |
||
1308 | self.usyms = [] |
||
1309 | for s in self.prod: |
||
1310 | if s not in self.usyms: |
||
1311 | self.usyms.append(s) |
||
1312 | |||
1313 | # List of all LR items for the production |
||
1314 | self.lr_items = [] |
||
1315 | self.lr_next = None |
||
1316 | |||
1317 | # Create a string representation |
||
1318 | if self.prod: |
||
1319 | self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) |
||
1320 | else: |
||
1321 | self.str = '%s -> <empty>' % self.name |
||
1322 | |||
1323 | def __str__(self): |
||
1324 | return self.str |
||
1325 | |||
1326 | def __repr__(self): |
||
1327 | return 'Production(' + str(self) + ')' |
||
1328 | |||
1329 | def __len__(self): |
||
1330 | return len(self.prod) |
||
1331 | |||
1332 | def __nonzero__(self): |
||
1333 | return 1 |
||
1334 | |||
1335 | def __getitem__(self, index): |
||
1336 | return self.prod[index] |
||
1337 | |||
1338 | # Return the nth lr_item from the production (or None if at the end) |
||
1339 | def lr_item(self, n): |
||
1340 | if n > len(self.prod): |
||
1341 | return None |
||
1342 | p = LRItem(self, n) |
||
1343 | # Precompute the list of productions immediately following. |
||
1344 | try: |
||
1345 | p.lr_after = Prodnames[p.prod[n+1]] |
||
1346 | except (IndexError, KeyError): |
||
1347 | p.lr_after = [] |
||
1348 | try: |
||
1349 | p.lr_before = p.prod[n-1] |
||
1350 | except IndexError: |
||
1351 | p.lr_before = None |
||
1352 | return p |
||
1353 | |||
1354 | # Bind the production function name to a callable |
||
1355 | def bind(self, pdict): |
||
1356 | if self.func: |
||
1357 | self.callable = pdict[self.func] |
||
1358 | |||
1359 | # This class serves as a minimal standin for Production objects when |
||
1360 | # reading table data from files. It only contains information |
||
1361 | # actually used by the LR parsing engine, plus some additional |
||
1362 | # debugging information. |
||
1363 | class MiniProduction(object): |
||
1364 | def __init__(self, str, name, len, func, file, line): |
||
1365 | self.name = name |
||
1366 | self.len = len |
||
1367 | self.func = func |
||
1368 | self.callable = None |
||
1369 | self.file = file |
||
1370 | self.line = line |
||
1371 | self.str = str |
||
1372 | |||
1373 | def __str__(self): |
||
1374 | return self.str |
||
1375 | |||
1376 | def __repr__(self): |
||
1377 | return 'MiniProduction(%s)' % self.str |
||
1378 | |||
1379 | # Bind the production function name to a callable |
||
1380 | def bind(self, pdict): |
||
1381 | if self.func: |
||
1382 | self.callable = pdict[self.func] |
||
1383 | |||
1384 | |||
1385 | # ----------------------------------------------------------------------------- |
||
1386 | # class LRItem |
||
1387 | # |
||
1388 | # This class represents a specific stage of parsing a production rule. For |
||
1389 | # example: |
||
1390 | # |
||
1391 | # expr : expr . PLUS term |
||
1392 | # |
||
1393 | # In the above, the "." represents the current location of the parse. Here |
||
1394 | # basic attributes: |
||
1395 | # |
||
1396 | # name - Name of the production. For example 'expr' |
||
1397 | # prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] |
||
1398 | # number - Production number. |
||
1399 | # |
||
1400 | # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' |
||
1401 | # then lr_next refers to 'expr -> expr PLUS . term' |
||
1402 | # lr_index - LR item index (location of the ".") in the prod list. |
||
1403 | # lookaheads - LALR lookahead symbols for this item |
||
1404 | # len - Length of the production (number of symbols on right hand side) |
||
1405 | # lr_after - List of all productions that immediately follow |
||
1406 | # lr_before - Grammar symbol immediately before |
||
1407 | # ----------------------------------------------------------------------------- |
||
1408 | |||
1409 | class LRItem(object): |
||
1410 | def __init__(self, p, n): |
||
1411 | self.name = p.name |
||
1412 | self.prod = list(p.prod) |
||
1413 | self.number = p.number |
||
1414 | self.lr_index = n |
||
1415 | self.lookaheads = {} |
||
1416 | self.prod.insert(n, '.') |
||
1417 | self.prod = tuple(self.prod) |
||
1418 | self.len = len(self.prod) |
||
1419 | self.usyms = p.usyms |
||
1420 | |||
1421 | def __str__(self): |
||
1422 | if self.prod: |
||
1423 | s = '%s -> %s' % (self.name, ' '.join(self.prod)) |
||
1424 | else: |
||
1425 | s = '%s -> <empty>' % self.name |
||
1426 | return s |
||
1427 | |||
1428 | def __repr__(self): |
||
1429 | return 'LRItem(' + str(self) + ')' |
||
1430 | |||
1431 | # ----------------------------------------------------------------------------- |
||
1432 | # rightmost_terminal() |
||
1433 | # |
||
1434 | # Return the rightmost terminal from a list of symbols. Used in add_production() |
||
1435 | # ----------------------------------------------------------------------------- |
||
1436 | def rightmost_terminal(symbols, terminals): |
||
1437 | i = len(symbols) - 1 |
||
1438 | while i >= 0: |
||
1439 | if symbols[i] in terminals: |
||
1440 | return symbols[i] |
||
1441 | i -= 1 |
||
1442 | return None |
||
1443 | |||
1444 | # ----------------------------------------------------------------------------- |
||
1445 | # === GRAMMAR CLASS === |
||
1446 | # |
||
1447 | # The following class represents the contents of the specified grammar along |
||
1448 | # with various computed properties such as first sets, follow sets, LR items, etc. |
||
1449 | # This data is used for critical parts of the table generation process later. |
||
1450 | # ----------------------------------------------------------------------------- |
||
1451 | |||
1452 | class GrammarError(YaccError): |
||
1453 | pass |
||
1454 | |||
1455 | class Grammar(object): |
||
1456 | def __init__(self, terminals): |
||
1457 | self.Productions = [None] # A list of all of the productions. The first |
||
1458 | # entry is always reserved for the purpose of |
||
1459 | # building an augmented grammar |
||
1460 | |||
1461 | self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all |
||
1462 | # productions of that nonterminal. |
||
1463 | |||
1464 | self.Prodmap = {} # A dictionary that is only used to detect duplicate |
||
1465 | # productions. |
||
1466 | |||
1467 | self.Terminals = {} # A dictionary mapping the names of terminal symbols to a |
||
1468 | # list of the rules where they are used. |
||
1469 | |||
1470 | for term in terminals: |
||
1471 | self.Terminals[term] = [] |
||
1472 | |||
1473 | self.Terminals['error'] = [] |
||
1474 | |||
1475 | self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list |
||
1476 | # of rule numbers where they are used. |
||
1477 | |||
1478 | self.First = {} # A dictionary of precomputed FIRST(x) symbols |
||
1479 | |||
1480 | self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols |
||
1481 | |||
1482 | self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the |
||
1483 | # form ('right',level) or ('nonassoc', level) or ('left',level) |
||
1484 | |||
1485 | self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. |
||
1486 | # This is only used to provide error checking and to generate |
||
1487 | # a warning about unused precedence rules. |
||
1488 | |||
1489 | self.Start = None # Starting symbol for the grammar |
||
1490 | |||
1491 | |||
1492 | def __len__(self): |
||
1493 | return len(self.Productions) |
||
1494 | |||
1495 | def __getitem__(self, index): |
||
1496 | return self.Productions[index] |
||
1497 | |||
1498 | # ----------------------------------------------------------------------------- |
||
1499 | # set_precedence() |
||
1500 | # |
||
1501 | # Sets the precedence for a given terminal. assoc is the associativity such as |
||
1502 | # 'left','right', or 'nonassoc'. level is a numeric level. |
||
1503 | # |
||
1504 | # ----------------------------------------------------------------------------- |
||
1505 | |||
1506 | def set_precedence(self, term, assoc, level): |
||
1507 | assert self.Productions == [None], 'Must call set_precedence() before add_production()' |
||
1508 | if term in self.Precedence: |
||
1509 | raise GrammarError('Precedence already specified for terminal %r' % term) |
||
1510 | if assoc not in ['left', 'right', 'nonassoc']: |
||
1511 | raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") |
||
1512 | self.Precedence[term] = (assoc, level) |
||
1513 | |||
1514 | # ----------------------------------------------------------------------------- |
||
1515 | # add_production() |
||
1516 | # |
||
1517 | # Given an action function, this function assembles a production rule and |
||
1518 | # computes its precedence level. |
||
1519 | # |
||
1520 | # The production rule is supplied as a list of symbols. For example, |
||
1521 | # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and |
||
1522 | # symbols ['expr','PLUS','term']. |
||
1523 | # |
||
1524 | # Precedence is determined by the precedence of the right-most non-terminal |
||
1525 | # or the precedence of a terminal specified by %prec. |
||
1526 | # |
||
1527 | # A variety of error checks are performed to make sure production symbols |
||
1528 | # are valid and that %prec is used correctly. |
||
1529 | # ----------------------------------------------------------------------------- |
||
1530 | |||
1531 | def add_production(self, prodname, syms, func=None, file='', line=0): |
||
1532 | |||
1533 | if prodname in self.Terminals: |
||
1534 | raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) |
||
1535 | if prodname == 'error': |
||
1536 | raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) |
||
1537 | if not _is_identifier.match(prodname): |
||
1538 | raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) |
||
1539 | |||
1540 | # Look for literal tokens |
||
1541 | for n, s in enumerate(syms): |
||
1542 | if s[0] in "'\"": |
||
1543 | try: |
||
1544 | c = eval(s) |
||
1545 | if (len(c) > 1): |
||
1546 | raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % |
||
1547 | (file, line, s, prodname)) |
||
1548 | if c not in self.Terminals: |
||
1549 | self.Terminals[c] = [] |
||
1550 | syms[n] = c |
||
1551 | continue |
||
1552 | except SyntaxError: |
||
1553 | pass |
||
1554 | if not _is_identifier.match(s) and s != '%prec': |
||
1555 | raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) |
||
1556 | |||
1557 | # Determine the precedence level |
||
1558 | if '%prec' in syms: |
||
1559 | if syms[-1] == '%prec': |
||
1560 | raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) |
||
1561 | if syms[-2] != '%prec': |
||
1562 | raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % |
||
1563 | (file, line)) |
||
1564 | precname = syms[-1] |
||
1565 | prodprec = self.Precedence.get(precname) |
||
1566 | if not prodprec: |
||
1567 | raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) |
||
1568 | else: |
||
1569 | self.UsedPrecedence.add(precname) |
||
1570 | del syms[-2:] # Drop %prec from the rule |
||
1571 | else: |
||
1572 | # If no %prec, precedence is determined by the rightmost terminal symbol |
||
1573 | precname = rightmost_terminal(syms, self.Terminals) |
||
1574 | prodprec = self.Precedence.get(precname, ('right', 0)) |
||
1575 | |||
1576 | # See if the rule is already in the rulemap |
||
1577 | map = '%s -> %s' % (prodname, syms) |
||
1578 | if map in self.Prodmap: |
||
1579 | m = self.Prodmap[map] |
||
1580 | raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + |
||
1581 | 'Previous definition at %s:%d' % (m.file, m.line)) |
||
1582 | |||
1583 | # From this point on, everything is valid. Create a new Production instance |
||
1584 | pnumber = len(self.Productions) |
||
1585 | if prodname not in self.Nonterminals: |
||
1586 | self.Nonterminals[prodname] = [] |
||
1587 | |||
1588 | # Add the production number to Terminals and Nonterminals |
||
1589 | for t in syms: |
||
1590 | if t in self.Terminals: |
||
1591 | self.Terminals[t].append(pnumber) |
||
1592 | else: |
||
1593 | if t not in self.Nonterminals: |
||
1594 | self.Nonterminals[t] = [] |
||
1595 | self.Nonterminals[t].append(pnumber) |
||
1596 | |||
1597 | # Create a production and add it to the list of productions |
||
1598 | p = Production(pnumber, prodname, syms, prodprec, func, file, line) |
||
1599 | self.Productions.append(p) |
||
1600 | self.Prodmap[map] = p |
||
1601 | |||
1602 | # Add to the global productions list |
||
1603 | try: |
||
1604 | self.Prodnames[prodname].append(p) |
||
1605 | except KeyError: |
||
1606 | self.Prodnames[prodname] = [p] |
||
1607 | |||
1608 | # ----------------------------------------------------------------------------- |
||
1609 | # set_start() |
||
1610 | # |
||
1611 | # Sets the starting symbol and creates the augmented grammar. Production |
||
1612 | # rule 0 is S' -> start where start is the start symbol. |
||
1613 | # ----------------------------------------------------------------------------- |
||
1614 | |||
1615 | def set_start(self, start=None): |
||
1616 | if not start: |
||
1617 | start = self.Productions[1].name |
||
1618 | if start not in self.Nonterminals: |
||
1619 | raise GrammarError('start symbol %s undefined' % start) |
||
1620 | self.Productions[0] = Production(0, "S'", [start]) |
||
1621 | self.Nonterminals[start].append(0) |
||
1622 | self.Start = start |
||
1623 | |||
1624 | # ----------------------------------------------------------------------------- |
||
1625 | # find_unreachable() |
||
1626 | # |
||
1627 | # Find all of the nonterminal symbols that can't be reached from the starting |
||
1628 | # symbol. Returns a list of nonterminals that can't be reached. |
||
1629 | # ----------------------------------------------------------------------------- |
||
1630 | |||
1631 | def find_unreachable(self): |
||
1632 | |||
1633 | # Mark all symbols that are reachable from a symbol s |
||
1634 | def mark_reachable_from(s): |
||
1635 | if s in reachable: |
||
1636 | return |
||
1637 | reachable.add(s) |
||
1638 | for p in self.Prodnames.get(s, []): |
||
1639 | for r in p.prod: |
||
1640 | mark_reachable_from(r) |
||
1641 | |||
1642 | reachable = set() |
||
1643 | mark_reachable_from(self.Productions[0].prod[0]) |
||
1644 | return [s for s in self.Nonterminals if s not in reachable] |
||
1645 | |||
1646 | # ----------------------------------------------------------------------------- |
||
1647 | # infinite_cycles() |
||
1648 | # |
||
1649 | # This function looks at the various parsing rules and tries to detect |
||
1650 | # infinite recursion cycles (grammar rules where there is no possible way |
||
1651 | # to derive a string of only terminals). |
||
1652 | # ----------------------------------------------------------------------------- |
||
1653 | |||
1654 | def infinite_cycles(self): |
||
1655 | terminates = {} |
||
1656 | |||
1657 | # Terminals: |
||
1658 | for t in self.Terminals: |
||
1659 | terminates[t] = True |
||
1660 | |||
1661 | terminates['$end'] = True |
||
1662 | |||
1663 | # Nonterminals: |
||
1664 | |||
1665 | # Initialize to false: |
||
1666 | for n in self.Nonterminals: |
||
1667 | terminates[n] = False |
||
1668 | |||
1669 | # Then propagate termination until no change: |
||
1670 | while True: |
||
1671 | some_change = False |
||
1672 | for (n, pl) in self.Prodnames.items(): |
||
1673 | # Nonterminal n terminates iff any of its productions terminates. |
||
1674 | for p in pl: |
||
1675 | # Production p terminates iff all of its rhs symbols terminate. |
||
1676 | for s in p.prod: |
||
1677 | if not terminates[s]: |
||
1678 | # The symbol s does not terminate, |
||
1679 | # so production p does not terminate. |
||
1680 | p_terminates = False |
||
1681 | break |
||
1682 | else: |
||
1683 | # didn't break from the loop, |
||
1684 | # so every symbol s terminates |
||
1685 | # so production p terminates. |
||
1686 | p_terminates = True |
||
1687 | |||
1688 | if p_terminates: |
||
1689 | # symbol n terminates! |
||
1690 | if not terminates[n]: |
||
1691 | terminates[n] = True |
||
1692 | some_change = True |
||
1693 | # Don't need to consider any more productions for this n. |
||
1694 | break |
||
1695 | |||
1696 | if not some_change: |
||
1697 | break |
||
1698 | |||
1699 | infinite = [] |
||
1700 | for (s, term) in terminates.items(): |
||
1701 | if not term: |
||
1702 | if s not in self.Prodnames and s not in self.Terminals and s != 'error': |
||
1703 | # s is used-but-not-defined, and we've already warned of that, |
||
1704 | # so it would be overkill to say that it's also non-terminating. |
||
1705 | pass |
||
1706 | else: |
||
1707 | infinite.append(s) |
||
1708 | |||
1709 | return infinite |
||
1710 | |||
1711 | # ----------------------------------------------------------------------------- |
||
1712 | # undefined_symbols() |
||
1713 | # |
||
1714 | # Find all symbols that were used the grammar, but not defined as tokens or |
||
1715 | # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol |
||
1716 | # and prod is the production where the symbol was used. |
||
1717 | # ----------------------------------------------------------------------------- |
||
1718 | def undefined_symbols(self): |
||
1719 | result = [] |
||
1720 | for p in self.Productions: |
||
1721 | if not p: |
||
1722 | continue |
||
1723 | |||
1724 | for s in p.prod: |
||
1725 | if s not in self.Prodnames and s not in self.Terminals and s != 'error': |
||
1726 | result.append((s, p)) |
||
1727 | return result |
||
1728 | |||
1729 | # ----------------------------------------------------------------------------- |
||
1730 | # unused_terminals() |
||
1731 | # |
||
1732 | # Find all terminals that were defined, but not used by the grammar. Returns |
||
1733 | # a list of all symbols. |
||
1734 | # ----------------------------------------------------------------------------- |
||
1735 | def unused_terminals(self): |
||
1736 | unused_tok = [] |
||
1737 | for s, v in self.Terminals.items(): |
||
1738 | if s != 'error' and not v: |
||
1739 | unused_tok.append(s) |
||
1740 | |||
1741 | return unused_tok |
||
1742 | |||
1743 | # ------------------------------------------------------------------------------ |
||
1744 | # unused_rules() |
||
1745 | # |
||
1746 | # Find all grammar rules that were defined, but not used (maybe not reachable) |
||
1747 | # Returns a list of productions. |
||
1748 | # ------------------------------------------------------------------------------ |
||
1749 | |||
1750 | def unused_rules(self): |
||
1751 | unused_prod = [] |
||
1752 | for s, v in self.Nonterminals.items(): |
||
1753 | if not v: |
||
1754 | p = self.Prodnames[s][0] |
||
1755 | unused_prod.append(p) |
||
1756 | return unused_prod |
||
1757 | |||
1758 | # ----------------------------------------------------------------------------- |
||
1759 | # unused_precedence() |
||
1760 | # |
||
1761 | # Returns a list of tuples (term,precedence) corresponding to precedence |
||
1762 | # rules that were never used by the grammar. term is the name of the terminal |
||
1763 | # on which precedence was applied and precedence is a string such as 'left' or |
||
1764 | # 'right' corresponding to the type of precedence. |
||
1765 | # ----------------------------------------------------------------------------- |
||
1766 | |||
1767 | def unused_precedence(self): |
||
1768 | unused = [] |
||
1769 | for termname in self.Precedence: |
||
1770 | if not (termname in self.Terminals or termname in self.UsedPrecedence): |
||
1771 | unused.append((termname, self.Precedence[termname][0])) |
||
1772 | |||
1773 | return unused |
||
1774 | |||
1775 | # ------------------------------------------------------------------------- |
||
1776 | # _first() |
||
1777 | # |
||
1778 | # Compute the value of FIRST1(beta) where beta is a tuple of symbols. |
||
1779 | # |
||
1780 | # During execution of compute_first1, the result may be incomplete. |
||
1781 | # Afterward (e.g., when called from compute_follow()), it will be complete. |
||
1782 | # ------------------------------------------------------------------------- |
||
1783 | def _first(self, beta): |
||
1784 | |||
1785 | # We are computing First(x1,x2,x3,...,xn) |
||
1786 | result = [] |
||
1787 | for x in beta: |
||
1788 | x_produces_empty = False |
||
1789 | |||
1790 | # Add all the non-<empty> symbols of First[x] to the result. |
||
1791 | for f in self.First[x]: |
||
1792 | if f == '<empty>': |
||
1793 | x_produces_empty = True |
||
1794 | else: |
||
1795 | if f not in result: |
||
1796 | result.append(f) |
||
1797 | |||
1798 | if x_produces_empty: |
||
1799 | # We have to consider the next x in beta, |
||
1800 | # i.e. stay in the loop. |
||
1801 | pass |
||
1802 | else: |
||
1803 | # We don't have to consider any further symbols in beta. |
||
1804 | break |
||
1805 | else: |
||
1806 | # There was no 'break' from the loop, |
||
1807 | # so x_produces_empty was true for all x in beta, |
||
1808 | # so beta produces empty as well. |
||
1809 | result.append('<empty>') |
||
1810 | |||
1811 | return result |
||
1812 | |||
1813 | # ------------------------------------------------------------------------- |
||
1814 | # compute_first() |
||
1815 | # |
||
1816 | # Compute the value of FIRST1(X) for all symbols |
||
1817 | # ------------------------------------------------------------------------- |
||
1818 | def compute_first(self): |
||
1819 | if self.First: |
||
1820 | return self.First |
||
1821 | |||
1822 | # Terminals: |
||
1823 | for t in self.Terminals: |
||
1824 | self.First[t] = [t] |
||
1825 | |||
1826 | self.First['$end'] = ['$end'] |
||
1827 | |||
1828 | # Nonterminals: |
||
1829 | |||
1830 | # Initialize to the empty set: |
||
1831 | for n in self.Nonterminals: |
||
1832 | self.First[n] = [] |
||
1833 | |||
1834 | # Then propagate symbols until no change: |
||
1835 | while True: |
||
1836 | some_change = False |
||
1837 | for n in self.Nonterminals: |
||
1838 | for p in self.Prodnames[n]: |
||
1839 | for f in self._first(p.prod): |
||
1840 | if f not in self.First[n]: |
||
1841 | self.First[n].append(f) |
||
1842 | some_change = True |
||
1843 | if not some_change: |
||
1844 | break |
||
1845 | |||
1846 | return self.First |
||
1847 | |||
1848 | # --------------------------------------------------------------------- |
||
1849 | # compute_follow() |
||
1850 | # |
||
1851 | # Computes all of the follow sets for every non-terminal symbol. The |
||
1852 | # follow set is the set of all symbols that might follow a given |
||
1853 | # non-terminal. See the Dragon book, 2nd Ed. p. 189. |
||
1854 | # --------------------------------------------------------------------- |
||
1855 | def compute_follow(self, start=None): |
||
1856 | # If already computed, return the result |
||
1857 | if self.Follow: |
||
1858 | return self.Follow |
||
1859 | |||
1860 | # If first sets not computed yet, do that first. |
||
1861 | if not self.First: |
||
1862 | self.compute_first() |
||
1863 | |||
1864 | # Add '$end' to the follow list of the start symbol |
||
1865 | for k in self.Nonterminals: |
||
1866 | self.Follow[k] = [] |
||
1867 | |||
1868 | if not start: |
||
1869 | start = self.Productions[1].name |
||
1870 | |||
1871 | self.Follow[start] = ['$end'] |
||
1872 | |||
1873 | while True: |
||
1874 | didadd = False |
||
1875 | for p in self.Productions[1:]: |
||
1876 | # Here is the production set |
||
1877 | for i, B in enumerate(p.prod): |
||
1878 | if B in self.Nonterminals: |
||
1879 | # Okay. We got a non-terminal in a production |
||
1880 | fst = self._first(p.prod[i+1:]) |
||
1881 | hasempty = False |
||
1882 | for f in fst: |
||
1883 | if f != '<empty>' and f not in self.Follow[B]: |
||
1884 | self.Follow[B].append(f) |
||
1885 | didadd = True |
||
1886 | if f == '<empty>': |
||
1887 | hasempty = True |
||
1888 | if hasempty or i == (len(p.prod)-1): |
||
1889 | # Add elements of follow(a) to follow(b) |
||
1890 | for f in self.Follow[p.name]: |
||
1891 | if f not in self.Follow[B]: |
||
1892 | self.Follow[B].append(f) |
||
1893 | didadd = True |
||
1894 | if not didadd: |
||
1895 | break |
||
1896 | return self.Follow |
||
1897 | |||
1898 | |||
1899 | # ----------------------------------------------------------------------------- |
||
1900 | # build_lritems() |
||
1901 | # |
||
1902 | # This function walks the list of productions and builds a complete set of the |
||
1903 | # LR items. The LR items are stored in two ways: First, they are uniquely |
||
1904 | # numbered and placed in the list _lritems. Second, a linked list of LR items |
||
1905 | # is built for each production. For example: |
||
1906 | # |
||
1907 | # E -> E PLUS E |
||
1908 | # |
||
1909 | # Creates the list |
||
1910 | # |
||
1911 | # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] |
||
1912 | # ----------------------------------------------------------------------------- |
||
1913 | |||
1914 | def build_lritems(self): |
||
1915 | for p in self.Productions: |
||
1916 | lastlri = p |
||
1917 | i = 0 |
||
1918 | lr_items = [] |
||
1919 | while True: |
||
1920 | if i > len(p): |
||
1921 | lri = None |
||
1922 | else: |
||
1923 | lri = LRItem(p, i) |
||
1924 | # Precompute the list of productions immediately following |
||
1925 | try: |
||
1926 | lri.lr_after = self.Prodnames[lri.prod[i+1]] |
||
1927 | except (IndexError, KeyError): |
||
1928 | lri.lr_after = [] |
||
1929 | try: |
||
1930 | lri.lr_before = lri.prod[i-1] |
||
1931 | except IndexError: |
||
1932 | lri.lr_before = None |
||
1933 | |||
1934 | lastlri.lr_next = lri |
||
1935 | if not lri: |
||
1936 | break |
||
1937 | lr_items.append(lri) |
||
1938 | lastlri = lri |
||
1939 | i += 1 |
||
1940 | p.lr_items = lr_items |
||
1941 | |||
1942 | # ----------------------------------------------------------------------------- |
||
1943 | # == Class LRTable == |
||
1944 | # |
||
1945 | # This basic class represents a basic table of LR parsing information. |
||
1946 | # Methods for generating the tables are not defined here. They are defined |
||
1947 | # in the derived class LRGeneratedTable. |
||
1948 | # ----------------------------------------------------------------------------- |
||
1949 | |||
1950 | class VersionError(YaccError): |
||
1951 | pass |
||
1952 | |||
1953 | class LRTable(object): |
||
1954 | def __init__(self): |
||
1955 | self.lr_action = None |
||
1956 | self.lr_goto = None |
||
1957 | self.lr_productions = None |
||
1958 | self.lr_method = None |
||
1959 | |||
1960 | def read_table(self, module): |
||
1961 | if isinstance(module, types.ModuleType): |
||
1962 | parsetab = module |
||
1963 | else: |
||
1964 | exec('import %s' % module) |
||
1965 | parsetab = sys.modules[module] |
||
1966 | |||
1967 | if parsetab._tabversion != __tabversion__: |
||
1968 | raise VersionError('yacc table file version is out of date') |
||
1969 | |||
1970 | self.lr_action = parsetab._lr_action |
||
1971 | self.lr_goto = parsetab._lr_goto |
||
1972 | |||
1973 | self.lr_productions = [] |
||
1974 | for p in parsetab._lr_productions: |
||
1975 | self.lr_productions.append(MiniProduction(*p)) |
||
1976 | |||
1977 | self.lr_method = parsetab._lr_method |
||
1978 | return parsetab._lr_signature |
||
1979 | |||
1980 | def read_pickle(self, filename): |
||
1981 | try: |
||
1982 | import cPickle as pickle |
||
1983 | except ImportError: |
||
1984 | import pickle |
||
1985 | |||
1986 | if not os.path.exists(filename): |
||
1987 | raise ImportError |
||
1988 | |||
1989 | in_f = open(filename, 'rb') |
||
1990 | |||
1991 | tabversion = pickle.load(in_f) |
||
1992 | if tabversion != __tabversion__: |
||
1993 | raise VersionError('yacc table file version is out of date') |
||
1994 | self.lr_method = pickle.load(in_f) |
||
1995 | signature = pickle.load(in_f) |
||
1996 | self.lr_action = pickle.load(in_f) |
||
1997 | self.lr_goto = pickle.load(in_f) |
||
1998 | productions = pickle.load(in_f) |
||
1999 | |||
2000 | self.lr_productions = [] |
||
2001 | for p in productions: |
||
2002 | self.lr_productions.append(MiniProduction(*p)) |
||
2003 | |||
2004 | in_f.close() |
||
2005 | return signature |
||
2006 | |||
2007 | # Bind all production function names to callable objects in pdict |
||
2008 | def bind_callables(self, pdict): |
||
2009 | for p in self.lr_productions: |
||
2010 | p.bind(pdict) |
||
2011 | |||
2012 | |||
2013 | # ----------------------------------------------------------------------------- |
||
2014 | # === LR Generator === |
||
2015 | # |
||
2016 | # The following classes and functions are used to generate LR parsing tables on |
||
2017 | # a grammar. |
||
2018 | # ----------------------------------------------------------------------------- |
||
2019 | |||
2020 | # ----------------------------------------------------------------------------- |
||
2021 | # digraph() |
||
2022 | # traverse() |
||
2023 | # |
||
2024 | # The following two functions are used to compute set valued functions |
||
2025 | # of the form: |
||
2026 | # |
||
2027 | # F(x) = F'(x) U U{F(y) | x R y} |
||
2028 | # |
||
2029 | # This is used to compute the values of Read() sets as well as FOLLOW sets |
||
2030 | # in LALR(1) generation. |
||
2031 | # |
||
2032 | # Inputs: X - An input set |
||
2033 | # R - A relation |
||
2034 | # FP - Set-valued function |
||
2035 | # ------------------------------------------------------------------------------ |
||
2036 | |||
2037 | def digraph(X, R, FP): |
||
2038 | N = {} |
||
2039 | for x in X: |
||
2040 | N[x] = 0 |
||
2041 | stack = [] |
||
2042 | F = {} |
||
2043 | for x in X: |
||
2044 | if N[x] == 0: |
||
2045 | traverse(x, N, stack, F, X, R, FP) |
||
2046 | return F |
||
2047 | |||
2048 | def traverse(x, N, stack, F, X, R, FP): |
||
2049 | stack.append(x) |
||
2050 | d = len(stack) |
||
2051 | N[x] = d |
||
2052 | F[x] = FP(x) # F(X) <- F'(x) |
||
2053 | |||
2054 | rel = R(x) # Get y's related to x |
||
2055 | for y in rel: |
||
2056 | if N[y] == 0: |
||
2057 | traverse(y, N, stack, F, X, R, FP) |
||
2058 | N[x] = min(N[x], N[y]) |
||
2059 | for a in F.get(y, []): |
||
2060 | if a not in F[x]: |
||
2061 | F[x].append(a) |
||
2062 | if N[x] == d: |
||
2063 | N[stack[-1]] = MAXINT |
||
2064 | F[stack[-1]] = F[x] |
||
2065 | element = stack.pop() |
||
2066 | while element != x: |
||
2067 | N[stack[-1]] = MAXINT |
||
2068 | F[stack[-1]] = F[x] |
||
2069 | element = stack.pop() |
||
2070 | |||
2071 | class LALRError(YaccError): |
||
2072 | pass |
||
2073 | |||
2074 | # ----------------------------------------------------------------------------- |
||
2075 | # == LRGeneratedTable == |
||
2076 | # |
||
2077 | # This class implements the LR table generation algorithm. There are no |
||
2078 | # public methods except for write() |
||
2079 | # ----------------------------------------------------------------------------- |
||
2080 | |||
2081 | class LRGeneratedTable(LRTable): |
||
2082 | def __init__(self, grammar, method='LALR', log=None): |
||
2083 | if method not in ['SLR', 'LALR']: |
||
2084 | raise LALRError('Unsupported method %s' % method) |
||
2085 | |||
2086 | self.grammar = grammar |
||
2087 | self.lr_method = method |
||
2088 | |||
2089 | # Set up the logger |
||
2090 | if not log: |
||
2091 | log = NullLogger() |
||
2092 | self.log = log |
||
2093 | |||
2094 | # Internal attributes |
||
2095 | self.lr_action = {} # Action table |
||
2096 | self.lr_goto = {} # Goto table |
||
2097 | self.lr_productions = grammar.Productions # Copy of grammar Production array |
||
2098 | self.lr_goto_cache = {} # Cache of computed gotos |
||
2099 | self.lr0_cidhash = {} # Cache of closures |
||
2100 | |||
2101 | self._add_count = 0 # Internal counter used to detect cycles |
||
2102 | |||
2103 | # Diagonistic information filled in by the table generator |
||
2104 | self.sr_conflict = 0 |
||
2105 | self.rr_conflict = 0 |
||
2106 | self.conflicts = [] # List of conflicts |
||
2107 | |||
2108 | self.sr_conflicts = [] |
||
2109 | self.rr_conflicts = [] |
||
2110 | |||
2111 | # Build the tables |
||
2112 | self.grammar.build_lritems() |
||
2113 | self.grammar.compute_first() |
||
2114 | self.grammar.compute_follow() |
||
2115 | self.lr_parse_table() |
||
2116 | |||
2117 | # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. |
||
2118 | |||
2119 | def lr0_closure(self, I): |
||
2120 | self._add_count += 1 |
||
2121 | |||
2122 | # Add everything in I to J |
||
2123 | J = I[:] |
||
2124 | didadd = True |
||
2125 | while didadd: |
||
2126 | didadd = False |
||
2127 | for j in J: |
||
2128 | for x in j.lr_after: |
||
2129 | if getattr(x, 'lr0_added', 0) == self._add_count: |
||
2130 | continue |
||
2131 | # Add B --> .G to J |
||
2132 | J.append(x.lr_next) |
||
2133 | x.lr0_added = self._add_count |
||
2134 | didadd = True |
||
2135 | |||
2136 | return J |
||
2137 | |||
2138 | # Compute the LR(0) goto function goto(I,X) where I is a set |
||
2139 | # of LR(0) items and X is a grammar symbol. This function is written |
||
2140 | # in a way that guarantees uniqueness of the generated goto sets |
||
2141 | # (i.e. the same goto set will never be returned as two different Python |
||
2142 | # objects). With uniqueness, we can later do fast set comparisons using |
||
2143 | # id(obj) instead of element-wise comparison. |
||
2144 | |||
2145 | def lr0_goto(self, I, x): |
||
2146 | # First we look for a previously cached entry |
||
2147 | g = self.lr_goto_cache.get((id(I), x)) |
||
2148 | if g: |
||
2149 | return g |
||
2150 | |||
2151 | # Now we generate the goto set in a way that guarantees uniqueness |
||
2152 | # of the result |
||
2153 | |||
2154 | s = self.lr_goto_cache.get(x) |
||
2155 | if not s: |
||
2156 | s = {} |
||
2157 | self.lr_goto_cache[x] = s |
||
2158 | |||
2159 | gs = [] |
||
2160 | for p in I: |
||
2161 | n = p.lr_next |
||
2162 | if n and n.lr_before == x: |
||
2163 | s1 = s.get(id(n)) |
||
2164 | if not s1: |
||
2165 | s1 = {} |
||
2166 | s[id(n)] = s1 |
||
2167 | gs.append(n) |
||
2168 | s = s1 |
||
2169 | g = s.get('$end') |
||
2170 | if not g: |
||
2171 | if gs: |
||
2172 | g = self.lr0_closure(gs) |
||
2173 | s['$end'] = g |
||
2174 | else: |
||
2175 | s['$end'] = gs |
||
2176 | self.lr_goto_cache[(id(I), x)] = g |
||
2177 | return g |
||
2178 | |||
2179 | # Compute the LR(0) sets of item function |
||
2180 | def lr0_items(self): |
||
2181 | C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] |
||
2182 | i = 0 |
||
2183 | for I in C: |
||
2184 | self.lr0_cidhash[id(I)] = i |
||
2185 | i += 1 |
||
2186 | |||
2187 | # Loop over the items in C and each grammar symbols |
||
2188 | i = 0 |
||
2189 | while i < len(C): |
||
2190 | I = C[i] |
||
2191 | i += 1 |
||
2192 | |||
2193 | # Collect all of the symbols that could possibly be in the goto(I,X) sets |
||
2194 | asyms = {} |
||
2195 | for ii in I: |
||
2196 | for s in ii.usyms: |
||
2197 | asyms[s] = None |
||
2198 | |||
2199 | for x in asyms: |
||
2200 | g = self.lr0_goto(I, x) |
||
2201 | if not g or id(g) in self.lr0_cidhash: |
||
2202 | continue |
||
2203 | self.lr0_cidhash[id(g)] = len(C) |
||
2204 | C.append(g) |
||
2205 | |||
2206 | return C |
||
2207 | |||
2208 | # ----------------------------------------------------------------------------- |
||
2209 | # ==== LALR(1) Parsing ==== |
||
2210 | # |
||
2211 | # LALR(1) parsing is almost exactly the same as SLR except that instead of |
||
2212 | # relying upon Follow() sets when performing reductions, a more selective |
||
2213 | # lookahead set that incorporates the state of the LR(0) machine is utilized. |
||
2214 | # Thus, we mainly just have to focus on calculating the lookahead sets. |
||
2215 | # |
||
2216 | # The method used here is due to DeRemer and Pennelo (1982). |
||
2217 | # |
||
2218 | # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) |
||
2219 | # Lookahead Sets", ACM Transactions on Programming Languages and Systems, |
||
2220 | # Vol. 4, No. 4, Oct. 1982, pp. 615-649 |
||
2221 | # |
||
2222 | # Further details can also be found in: |
||
2223 | # |
||
2224 | # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", |
||
2225 | # McGraw-Hill Book Company, (1985). |
||
2226 | # |
||
2227 | # ----------------------------------------------------------------------------- |
||
2228 | |||
2229 | # ----------------------------------------------------------------------------- |
||
2230 | # compute_nullable_nonterminals() |
||
2231 | # |
||
2232 | # Creates a dictionary containing all of the non-terminals that might produce |
||
2233 | # an empty production. |
||
2234 | # ----------------------------------------------------------------------------- |
||
2235 | |||
2236 | def compute_nullable_nonterminals(self): |
||
2237 | nullable = set() |
||
2238 | num_nullable = 0 |
||
2239 | while True: |
||
2240 | for p in self.grammar.Productions[1:]: |
||
2241 | if p.len == 0: |
||
2242 | nullable.add(p.name) |
||
2243 | continue |
||
2244 | for t in p.prod: |
||
2245 | if t not in nullable: |
||
2246 | break |
||
2247 | else: |
||
2248 | nullable.add(p.name) |
||
2249 | if len(nullable) == num_nullable: |
||
2250 | break |
||
2251 | num_nullable = len(nullable) |
||
2252 | return nullable |
||
2253 | |||
2254 | # ----------------------------------------------------------------------------- |
||
2255 | # find_nonterminal_trans(C) |
||
2256 | # |
||
2257 | # Given a set of LR(0) items, this functions finds all of the non-terminal |
||
2258 | # transitions. These are transitions in which a dot appears immediately before |
||
2259 | # a non-terminal. Returns a list of tuples of the form (state,N) where state |
||
2260 | # is the state number and N is the nonterminal symbol. |
||
2261 | # |
||
2262 | # The input C is the set of LR(0) items. |
||
2263 | # ----------------------------------------------------------------------------- |
||
2264 | |||
2265 | def find_nonterminal_transitions(self, C): |
||
2266 | trans = [] |
||
2267 | for stateno, state in enumerate(C): |
||
2268 | for p in state: |
||
2269 | if p.lr_index < p.len - 1: |
||
2270 | t = (stateno, p.prod[p.lr_index+1]) |
||
2271 | if t[1] in self.grammar.Nonterminals: |
||
2272 | if t not in trans: |
||
2273 | trans.append(t) |
||
2274 | return trans |
||
2275 | |||
2276 | # ----------------------------------------------------------------------------- |
||
2277 | # dr_relation() |
||
2278 | # |
||
2279 | # Computes the DR(p,A) relationships for non-terminal transitions. The input |
||
2280 | # is a tuple (state,N) where state is a number and N is a nonterminal symbol. |
||
2281 | # |
||
2282 | # Returns a list of terminals. |
||
2283 | # ----------------------------------------------------------------------------- |
||
2284 | |||
2285 | def dr_relation(self, C, trans, nullable): |
||
2286 | dr_set = {} |
||
2287 | state, N = trans |
||
2288 | terms = [] |
||
2289 | |||
2290 | g = self.lr0_goto(C[state], N) |
||
2291 | for p in g: |
||
2292 | if p.lr_index < p.len - 1: |
||
2293 | a = p.prod[p.lr_index+1] |
||
2294 | if a in self.grammar.Terminals: |
||
2295 | if a not in terms: |
||
2296 | terms.append(a) |
||
2297 | |||
2298 | # This extra bit is to handle the start state |
||
2299 | if state == 0 and N == self.grammar.Productions[0].prod[0]: |
||
2300 | terms.append('$end') |
||
2301 | |||
2302 | return terms |
||
2303 | |||
2304 | # ----------------------------------------------------------------------------- |
||
2305 | # reads_relation() |
||
2306 | # |
||
2307 | # Computes the READS() relation (p,A) READS (t,C). |
||
2308 | # ----------------------------------------------------------------------------- |
||
2309 | |||
2310 | def reads_relation(self, C, trans, empty): |
||
2311 | # Look for empty transitions |
||
2312 | rel = [] |
||
2313 | state, N = trans |
||
2314 | |||
2315 | g = self.lr0_goto(C[state], N) |
||
2316 | j = self.lr0_cidhash.get(id(g), -1) |
||
2317 | for p in g: |
||
2318 | if p.lr_index < p.len - 1: |
||
2319 | a = p.prod[p.lr_index + 1] |
||
2320 | if a in empty: |
||
2321 | rel.append((j, a)) |
||
2322 | |||
2323 | return rel |
||
2324 | |||
2325 | # ----------------------------------------------------------------------------- |
||
2326 | # compute_lookback_includes() |
||
2327 | # |
||
2328 | # Determines the lookback and includes relations |
||
2329 | # |
||
2330 | # LOOKBACK: |
||
2331 | # |
||
2332 | # This relation is determined by running the LR(0) state machine forward. |
||
2333 | # For example, starting with a production "N : . A B C", we run it forward |
||
2334 | # to obtain "N : A B C ." We then build a relationship between this final |
||
2335 | # state and the starting state. These relationships are stored in a dictionary |
||
2336 | # lookdict. |
||
2337 | # |
||
2338 | # INCLUDES: |
||
2339 | # |
||
2340 | # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). |
||
2341 | # |
||
2342 | # This relation is used to determine non-terminal transitions that occur |
||
2343 | # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) |
||
2344 | # if the following holds: |
||
2345 | # |
||
2346 | # B -> LAT, where T -> epsilon and p' -L-> p |
||
2347 | # |
||
2348 | # L is essentially a prefix (which may be empty), T is a suffix that must be |
||
2349 | # able to derive an empty string. State p' must lead to state p with the string L. |
||
2350 | # |
||
2351 | # ----------------------------------------------------------------------------- |
||
2352 | |||
2353 | def compute_lookback_includes(self, C, trans, nullable): |
||
2354 | lookdict = {} # Dictionary of lookback relations |
||
2355 | includedict = {} # Dictionary of include relations |
||
2356 | |||
2357 | # Make a dictionary of non-terminal transitions |
||
2358 | dtrans = {} |
||
2359 | for t in trans: |
||
2360 | dtrans[t] = 1 |
||
2361 | |||
2362 | # Loop over all transitions and compute lookbacks and includes |
||
2363 | for state, N in trans: |
||
2364 | lookb = [] |
||
2365 | includes = [] |
||
2366 | for p in C[state]: |
||
2367 | if p.name != N: |
||
2368 | continue |
||
2369 | |||
2370 | # Okay, we have a name match. We now follow the production all the way |
||
2371 | # through the state machine until we get the . on the right hand side |
||
2372 | |||
2373 | lr_index = p.lr_index |
||
2374 | j = state |
||
2375 | while lr_index < p.len - 1: |
||
2376 | lr_index = lr_index + 1 |
||
2377 | t = p.prod[lr_index] |
||
2378 | |||
2379 | # Check to see if this symbol and state are a non-terminal transition |
||
2380 | if (j, t) in dtrans: |
||
2381 | # Yes. Okay, there is some chance that this is an includes relation |
||
2382 | # the only way to know for certain is whether the rest of the |
||
2383 | # production derives empty |
||
2384 | |||
2385 | li = lr_index + 1 |
||
2386 | while li < p.len: |
||
2387 | if p.prod[li] in self.grammar.Terminals: |
||
2388 | break # No forget it |
||
2389 | if p.prod[li] not in nullable: |
||
2390 | break |
||
2391 | li = li + 1 |
||
2392 | else: |
||
2393 | # Appears to be a relation between (j,t) and (state,N) |
||
2394 | includes.append((j, t)) |
||
2395 | |||
2396 | g = self.lr0_goto(C[j], t) # Go to next set |
||
2397 | j = self.lr0_cidhash.get(id(g), -1) # Go to next state |
||
2398 | |||
2399 | # When we get here, j is the final state, now we have to locate the production |
||
2400 | for r in C[j]: |
||
2401 | if r.name != p.name: |
||
2402 | continue |
||
2403 | if r.len != p.len: |
||
2404 | continue |
||
2405 | i = 0 |
||
2406 | # This look is comparing a production ". A B C" with "A B C ." |
||
2407 | while i < r.lr_index: |
||
2408 | if r.prod[i] != p.prod[i+1]: |
||
2409 | break |
||
2410 | i = i + 1 |
||
2411 | else: |
||
2412 | lookb.append((j, r)) |
||
2413 | for i in includes: |
||
2414 | if i not in includedict: |
||
2415 | includedict[i] = [] |
||
2416 | includedict[i].append((state, N)) |
||
2417 | lookdict[(state, N)] = lookb |
||
2418 | |||
2419 | return lookdict, includedict |
||
2420 | |||
2421 | # ----------------------------------------------------------------------------- |
||
2422 | # compute_read_sets() |
||
2423 | # |
||
2424 | # Given a set of LR(0) items, this function computes the read sets. |
||
2425 | # |
||
2426 | # Inputs: C = Set of LR(0) items |
||
2427 | # ntrans = Set of nonterminal transitions |
||
2428 | # nullable = Set of empty transitions |
||
2429 | # |
||
2430 | # Returns a set containing the read sets |
||
2431 | # ----------------------------------------------------------------------------- |
||
2432 | |||
2433 | def compute_read_sets(self, C, ntrans, nullable): |
||
2434 | FP = lambda x: self.dr_relation(C, x, nullable) |
||
2435 | R = lambda x: self.reads_relation(C, x, nullable) |
||
2436 | F = digraph(ntrans, R, FP) |
||
2437 | return F |
||
2438 | |||
2439 | # ----------------------------------------------------------------------------- |
||
2440 | # compute_follow_sets() |
||
2441 | # |
||
2442 | # Given a set of LR(0) items, a set of non-terminal transitions, a readset, |
||
2443 | # and an include set, this function computes the follow sets |
||
2444 | # |
||
2445 | # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} |
||
2446 | # |
||
2447 | # Inputs: |
||
2448 | # ntrans = Set of nonterminal transitions |
||
2449 | # readsets = Readset (previously computed) |
||
2450 | # inclsets = Include sets (previously computed) |
||
2451 | # |
||
2452 | # Returns a set containing the follow sets |
||
2453 | # ----------------------------------------------------------------------------- |
||
2454 | |||
2455 | def compute_follow_sets(self, ntrans, readsets, inclsets): |
||
2456 | FP = lambda x: readsets[x] |
||
2457 | R = lambda x: inclsets.get(x, []) |
||
2458 | F = digraph(ntrans, R, FP) |
||
2459 | return F |
||
2460 | |||
2461 | # ----------------------------------------------------------------------------- |
||
2462 | # add_lookaheads() |
||
2463 | # |
||
2464 | # Attaches the lookahead symbols to grammar rules. |
||
2465 | # |
||
2466 | # Inputs: lookbacks - Set of lookback relations |
||
2467 | # followset - Computed follow set |
||
2468 | # |
||
2469 | # This function directly attaches the lookaheads to productions contained |
||
2470 | # in the lookbacks set |
||
2471 | # ----------------------------------------------------------------------------- |
||
2472 | |||
2473 | def add_lookaheads(self, lookbacks, followset): |
||
2474 | for trans, lb in lookbacks.items(): |
||
2475 | # Loop over productions in lookback |
||
2476 | for state, p in lb: |
||
2477 | if state not in p.lookaheads: |
||
2478 | p.lookaheads[state] = [] |
||
2479 | f = followset.get(trans, []) |
||
2480 | for a in f: |
||
2481 | if a not in p.lookaheads[state]: |
||
2482 | p.lookaheads[state].append(a) |
||
2483 | |||
2484 | # ----------------------------------------------------------------------------- |
||
2485 | # add_lalr_lookaheads() |
||
2486 | # |
||
2487 | # This function does all of the work of adding lookahead information for use |
||
2488 | # with LALR parsing |
||
2489 | # ----------------------------------------------------------------------------- |
||
2490 | |||
2491 | def add_lalr_lookaheads(self, C): |
||
2492 | # Determine all of the nullable nonterminals |
||
2493 | nullable = self.compute_nullable_nonterminals() |
||
2494 | |||
2495 | # Find all non-terminal transitions |
||
2496 | trans = self.find_nonterminal_transitions(C) |
||
2497 | |||
2498 | # Compute read sets |
||
2499 | readsets = self.compute_read_sets(C, trans, nullable) |
||
2500 | |||
2501 | # Compute lookback/includes relations |
||
2502 | lookd, included = self.compute_lookback_includes(C, trans, nullable) |
||
2503 | |||
2504 | # Compute LALR FOLLOW sets |
||
2505 | followsets = self.compute_follow_sets(trans, readsets, included) |
||
2506 | |||
2507 | # Add all of the lookaheads |
||
2508 | self.add_lookaheads(lookd, followsets) |
||
2509 | |||
2510 | # ----------------------------------------------------------------------------- |
||
2511 | # lr_parse_table() |
||
2512 | # |
||
2513 | # This function constructs the parse tables for SLR or LALR |
||
2514 | # ----------------------------------------------------------------------------- |
||
2515 | def lr_parse_table(self): |
||
2516 | Productions = self.grammar.Productions |
||
2517 | Precedence = self.grammar.Precedence |
||
2518 | goto = self.lr_goto # Goto array |
||
2519 | action = self.lr_action # Action array |
||
2520 | log = self.log # Logger for output |
||
2521 | |||
2522 | actionp = {} # Action production array (temporary) |
||
2523 | |||
2524 | log.info('Parsing method: %s', self.lr_method) |
||
2525 | |||
2526 | # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items |
||
2527 | # This determines the number of states |
||
2528 | |||
2529 | C = self.lr0_items() |
||
2530 | |||
2531 | if self.lr_method == 'LALR': |
||
2532 | self.add_lalr_lookaheads(C) |
||
2533 | |||
2534 | # Build the parser table, state by state |
||
2535 | st = 0 |
||
2536 | for I in C: |
||
2537 | # Loop over each production in I |
||
2538 | actlist = [] # List of actions |
||
2539 | st_action = {} |
||
2540 | st_actionp = {} |
||
2541 | st_goto = {} |
||
2542 | log.info('') |
||
2543 | log.info('state %d', st) |
||
2544 | log.info('') |
||
2545 | for p in I: |
||
2546 | log.info(' (%d) %s', p.number, p) |
||
2547 | log.info('') |
||
2548 | |||
2549 | for p in I: |
||
2550 | if p.len == p.lr_index + 1: |
||
2551 | if p.name == "S'": |
||
2552 | # Start symbol. Accept! |
||
2553 | st_action['$end'] = 0 |
||
2554 | st_actionp['$end'] = p |
||
2555 | else: |
||
2556 | # We are at the end of a production. Reduce! |
||
2557 | if self.lr_method == 'LALR': |
||
2558 | laheads = p.lookaheads[st] |
||
2559 | else: |
||
2560 | laheads = self.grammar.Follow[p.name] |
||
2561 | for a in laheads: |
||
2562 | actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) |
||
2563 | r = st_action.get(a) |
||
2564 | if r is not None: |
||
2565 | # Whoa. Have a shift/reduce or reduce/reduce conflict |
||
2566 | if r > 0: |
||
2567 | # Need to decide on shift or reduce here |
||
2568 | # By default we favor shifting. Need to add |
||
2569 | # some precedence rules here. |
||
2570 | sprec, slevel = Productions[st_actionp[a].number].prec |
||
2571 | rprec, rlevel = Precedence.get(a, ('right', 0)) |
||
2572 | if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): |
||
2573 | # We really need to reduce here. |
||
2574 | st_action[a] = -p.number |
||
2575 | st_actionp[a] = p |
||
2576 | if not slevel and not rlevel: |
||
2577 | log.info(' ! shift/reduce conflict for %s resolved as reduce', a) |
||
2578 | self.sr_conflicts.append((st, a, 'reduce')) |
||
2579 | Productions[p.number].reduced += 1 |
||
2580 | elif (slevel == rlevel) and (rprec == 'nonassoc'): |
||
2581 | st_action[a] = None |
||
2582 | else: |
||
2583 | # Hmmm. Guess we'll keep the shift |
||
2584 | if not rlevel: |
||
2585 | log.info(' ! shift/reduce conflict for %s resolved as shift', a) |
||
2586 | self.sr_conflicts.append((st, a, 'shift')) |
||
2587 | elif r < 0: |
||
2588 | # Reduce/reduce conflict. In this case, we favor the rule |
||
2589 | # that was defined first in the grammar file |
||
2590 | oldp = Productions[-r] |
||
2591 | pp = Productions[p.number] |
||
2592 | if oldp.line > pp.line: |
||
2593 | st_action[a] = -p.number |
||
2594 | st_actionp[a] = p |
||
2595 | chosenp, rejectp = pp, oldp |
||
2596 | Productions[p.number].reduced += 1 |
||
2597 | Productions[oldp.number].reduced -= 1 |
||
2598 | else: |
||
2599 | chosenp, rejectp = oldp, pp |
||
2600 | self.rr_conflicts.append((st, chosenp, rejectp)) |
||
2601 | log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', |
||
2602 | a, st_actionp[a].number, st_actionp[a]) |
||
2603 | else: |
||
2604 | raise LALRError('Unknown conflict in state %d' % st) |
||
2605 | else: |
||
2606 | st_action[a] = -p.number |
||
2607 | st_actionp[a] = p |
||
2608 | Productions[p.number].reduced += 1 |
||
2609 | else: |
||
2610 | i = p.lr_index |
||
2611 | a = p.prod[i+1] # Get symbol right after the "." |
||
2612 | if a in self.grammar.Terminals: |
||
2613 | g = self.lr0_goto(I, a) |
||
2614 | j = self.lr0_cidhash.get(id(g), -1) |
||
2615 | if j >= 0: |
||
2616 | # We are in a shift state |
||
2617 | actlist.append((a, p, 'shift and go to state %d' % j)) |
||
2618 | r = st_action.get(a) |
||
2619 | if r is not None: |
||
2620 | # Whoa have a shift/reduce or shift/shift conflict |
||
2621 | if r > 0: |
||
2622 | if r != j: |
||
2623 | raise LALRError('Shift/shift conflict in state %d' % st) |
||
2624 | elif r < 0: |
||
2625 | # Do a precedence check. |
||
2626 | # - if precedence of reduce rule is higher, we reduce. |
||
2627 | # - if precedence of reduce is same and left assoc, we reduce. |
||
2628 | # - otherwise we shift |
||
2629 | rprec, rlevel = Productions[st_actionp[a].number].prec |
||
2630 | sprec, slevel = Precedence.get(a, ('right', 0)) |
||
2631 | if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): |
||
2632 | # We decide to shift here... highest precedence to shift |
||
2633 | Productions[st_actionp[a].number].reduced -= 1 |
||
2634 | st_action[a] = j |
||
2635 | st_actionp[a] = p |
||
2636 | if not rlevel: |
||
2637 | log.info(' ! shift/reduce conflict for %s resolved as shift', a) |
||
2638 | self.sr_conflicts.append((st, a, 'shift')) |
||
2639 | elif (slevel == rlevel) and (rprec == 'nonassoc'): |
||
2640 | st_action[a] = None |
||
2641 | else: |
||
2642 | # Hmmm. Guess we'll keep the reduce |
||
2643 | if not slevel and not rlevel: |
||
2644 | log.info(' ! shift/reduce conflict for %s resolved as reduce', a) |
||
2645 | self.sr_conflicts.append((st, a, 'reduce')) |
||
2646 | |||
2647 | else: |
||
2648 | raise LALRError('Unknown conflict in state %d' % st) |
||
2649 | else: |
||
2650 | st_action[a] = j |
||
2651 | st_actionp[a] = p |
||
2652 | |||
2653 | # Print the actions associated with each terminal |
||
2654 | _actprint = {} |
||
2655 | for a, p, m in actlist: |
||
2656 | if a in st_action: |
||
2657 | if p is st_actionp[a]: |
||
2658 | log.info(' %-15s %s', a, m) |
||
2659 | _actprint[(a, m)] = 1 |
||
2660 | log.info('') |
||
2661 | # Print the actions that were not used. (debugging) |
||
2662 | not_used = 0 |
||
2663 | for a, p, m in actlist: |
||
2664 | if a in st_action: |
||
2665 | if p is not st_actionp[a]: |
||
2666 | if not (a, m) in _actprint: |
||
2667 | log.debug(' ! %-15s [ %s ]', a, m) |
||
2668 | not_used = 1 |
||
2669 | _actprint[(a, m)] = 1 |
||
2670 | if not_used: |
||
2671 | log.debug('') |
||
2672 | |||
2673 | # Construct the goto table for this state |
||
2674 | |||
2675 | nkeys = {} |
||
2676 | for ii in I: |
||
2677 | for s in ii.usyms: |
||
2678 | if s in self.grammar.Nonterminals: |
||
2679 | nkeys[s] = None |
||
2680 | for n in nkeys: |
||
2681 | g = self.lr0_goto(I, n) |
||
2682 | j = self.lr0_cidhash.get(id(g), -1) |
||
2683 | if j >= 0: |
||
2684 | st_goto[n] = j |
||
2685 | log.info(' %-30s shift and go to state %d', n, j) |
||
2686 | |||
2687 | action[st] = st_action |
||
2688 | actionp[st] = st_actionp |
||
2689 | goto[st] = st_goto |
||
2690 | st += 1 |
||
2691 | |||
2692 | # ----------------------------------------------------------------------------- |
||
2693 | # write() |
||
2694 | # |
||
2695 | # This function writes the LR parsing tables to a file |
||
2696 | # ----------------------------------------------------------------------------- |
||
2697 | |||
2698 | def write_table(self, tabmodule, outputdir='', signature=''): |
||
2699 | if isinstance(tabmodule, types.ModuleType): |
||
2700 | raise IOError("Won't overwrite existing tabmodule") |
||
2701 | |||
2702 | basemodulename = tabmodule.split('.')[-1] |
||
2703 | filename = os.path.join(outputdir, basemodulename) + '.py' |
||
2704 | try: |
||
2705 | f = open(filename, 'w') |
||
2706 | |||
2707 | f.write(''' |
||
2708 | # %s |
||
2709 | # This file is automatically generated. Do not edit. |
||
2710 | _tabversion = %r |
||
2711 | |||
2712 | _lr_method = %r |
||
2713 | |||
2714 | _lr_signature = %r |
||
2715 | ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) |
||
2716 | |||
2717 | # Change smaller to 0 to go back to original tables |
||
2718 | smaller = 1 |
||
2719 | |||
2720 | # Factor out names to try and make smaller |
||
2721 | if smaller: |
||
2722 | items = {} |
||
2723 | |||
2724 | for s, nd in self.lr_action.items(): |
||
2725 | for name, v in nd.items(): |
||
2726 | i = items.get(name) |
||
2727 | if not i: |
||
2728 | i = ([], []) |
||
2729 | items[name] = i |
||
2730 | i[0].append(s) |
||
2731 | i[1].append(v) |
||
2732 | |||
2733 | f.write('\n_lr_action_items = {') |
||
2734 | for k, v in items.items(): |
||
2735 | f.write('%r:([' % k) |
||
2736 | for i in v[0]: |
||
2737 | f.write('%r,' % i) |
||
2738 | f.write('],[') |
||
2739 | for i in v[1]: |
||
2740 | f.write('%r,' % i) |
||
2741 | |||
2742 | f.write(']),') |
||
2743 | f.write('}\n') |
||
2744 | |||
2745 | f.write(''' |
||
2746 | _lr_action = {} |
||
2747 | for _k, _v in _lr_action_items.items(): |
||
2748 | for _x,_y in zip(_v[0],_v[1]): |
||
2749 | if not _x in _lr_action: _lr_action[_x] = {} |
||
2750 | _lr_action[_x][_k] = _y |
||
2751 | del _lr_action_items |
||
2752 | ''') |
||
2753 | |||
2754 | else: |
||
2755 | f.write('\n_lr_action = { ') |
||
2756 | for k, v in self.lr_action.items(): |
||
2757 | f.write('(%r,%r):%r,' % (k[0], k[1], v)) |
||
2758 | f.write('}\n') |
||
2759 | |||
2760 | if smaller: |
||
2761 | # Factor out names to try and make smaller |
||
2762 | items = {} |
||
2763 | |||
2764 | for s, nd in self.lr_goto.items(): |
||
2765 | for name, v in nd.items(): |
||
2766 | i = items.get(name) |
||
2767 | if not i: |
||
2768 | i = ([], []) |
||
2769 | items[name] = i |
||
2770 | i[0].append(s) |
||
2771 | i[1].append(v) |
||
2772 | |||
2773 | f.write('\n_lr_goto_items = {') |
||
2774 | for k, v in items.items(): |
||
2775 | f.write('%r:([' % k) |
||
2776 | for i in v[0]: |
||
2777 | f.write('%r,' % i) |
||
2778 | f.write('],[') |
||
2779 | for i in v[1]: |
||
2780 | f.write('%r,' % i) |
||
2781 | |||
2782 | f.write(']),') |
||
2783 | f.write('}\n') |
||
2784 | |||
2785 | f.write(''' |
||
2786 | _lr_goto = {} |
||
2787 | for _k, _v in _lr_goto_items.items(): |
||
2788 | for _x, _y in zip(_v[0], _v[1]): |
||
2789 | if not _x in _lr_goto: _lr_goto[_x] = {} |
||
2790 | _lr_goto[_x][_k] = _y |
||
2791 | del _lr_goto_items |
||
2792 | ''') |
||
2793 | else: |
||
2794 | f.write('\n_lr_goto = { ') |
||
2795 | for k, v in self.lr_goto.items(): |
||
2796 | f.write('(%r,%r):%r,' % (k[0], k[1], v)) |
||
2797 | f.write('}\n') |
||
2798 | |||
2799 | # Write production table |
||
2800 | f.write('_lr_productions = [\n') |
||
2801 | for p in self.lr_productions: |
||
2802 | if p.func: |
||
2803 | f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, |
||
2804 | p.func, os.path.basename(p.file), p.line)) |
||
2805 | else: |
||
2806 | f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) |
||
2807 | f.write(']\n') |
||
2808 | f.close() |
||
2809 | |||
2810 | except IOError as e: |
||
2811 | raise |
||
2812 | |||
2813 | |||
2814 | # ----------------------------------------------------------------------------- |
||
2815 | # pickle_table() |
||
2816 | # |
||
2817 | # This function pickles the LR parsing tables to a supplied file object |
||
2818 | # ----------------------------------------------------------------------------- |
||
2819 | |||
2820 | def pickle_table(self, filename, signature=''): |
||
2821 | try: |
||
2822 | import cPickle as pickle |
||
2823 | except ImportError: |
||
2824 | import pickle |
||
2825 | with open(filename, 'wb') as outf: |
||
2826 | pickle.dump(__tabversion__, outf, pickle_protocol) |
||
2827 | pickle.dump(self.lr_method, outf, pickle_protocol) |
||
2828 | pickle.dump(signature, outf, pickle_protocol) |
||
2829 | pickle.dump(self.lr_action, outf, pickle_protocol) |
||
2830 | pickle.dump(self.lr_goto, outf, pickle_protocol) |
||
2831 | |||
2832 | outp = [] |
||
2833 | for p in self.lr_productions: |
||
2834 | if p.func: |
||
2835 | outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) |
||
2836 | else: |
||
2837 | outp.append((str(p), p.name, p.len, None, None, None)) |
||
2838 | pickle.dump(outp, outf, pickle_protocol) |
||
2839 | |||
2840 | # ----------------------------------------------------------------------------- |
||
2841 | # === INTROSPECTION === |
||
2842 | # |
||
2843 | # The following functions and classes are used to implement the PLY |
||
2844 | # introspection features followed by the yacc() function itself. |
||
2845 | # ----------------------------------------------------------------------------- |
||
2846 | |||
2847 | # ----------------------------------------------------------------------------- |
||
2848 | # get_caller_module_dict() |
||
2849 | # |
||
2850 | # This function returns a dictionary containing all of the symbols defined within |
||
2851 | # a caller further down the call stack. This is used to get the environment |
||
2852 | # associated with the yacc() call if none was provided. |
||
2853 | # ----------------------------------------------------------------------------- |
||
2854 | |||
2855 | def get_caller_module_dict(levels): |
||
2856 | f = sys._getframe(levels) |
||
2857 | ldict = f.f_globals.copy() |
||
2858 | if f.f_globals != f.f_locals: |
||
2859 | ldict.update(f.f_locals) |
||
2860 | return ldict |
||
2861 | |||
2862 | # ----------------------------------------------------------------------------- |
||
2863 | # parse_grammar() |
||
2864 | # |
||
2865 | # This takes a raw grammar rule string and parses it into production data |
||
2866 | # ----------------------------------------------------------------------------- |
||
2867 | def parse_grammar(doc, file, line): |
||
2868 | grammar = [] |
||
2869 | # Split the doc string into lines |
||
2870 | pstrings = doc.splitlines() |
||
2871 | lastp = None |
||
2872 | dline = line |
||
2873 | for ps in pstrings: |
||
2874 | dline += 1 |
||
2875 | p = ps.split() |
||
2876 | if not p: |
||
2877 | continue |
||
2878 | try: |
||
2879 | if p[0] == '|': |
||
2880 | # This is a continuation of a previous rule |
||
2881 | if not lastp: |
||
2882 | raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) |
||
2883 | prodname = lastp |
||
2884 | syms = p[1:] |
||
2885 | else: |
||
2886 | prodname = p[0] |
||
2887 | lastp = prodname |
||
2888 | syms = p[2:] |
||
2889 | assign = p[1] |
||
2890 | if assign != ':' and assign != '::=': |
||
2891 | raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) |
||
2892 | |||
2893 | grammar.append((file, dline, prodname, syms)) |
||
2894 | except SyntaxError: |
||
2895 | raise |
||
2896 | except Exception: |
||
2897 | raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) |
||
2898 | |||
2899 | return grammar |
||
2900 | |||
2901 | # ----------------------------------------------------------------------------- |
||
2902 | # ParserReflect() |
||
2903 | # |
||
2904 | # This class represents information extracted for building a parser including |
||
2905 | # start symbol, error function, tokens, precedence list, action functions, |
||
2906 | # etc. |
||
2907 | # ----------------------------------------------------------------------------- |
||
2908 | class ParserReflect(object): |
||
2909 | def __init__(self, pdict, log=None): |
||
2910 | self.pdict = pdict |
||
2911 | self.start = None |
||
2912 | self.error_func = None |
||
2913 | self.tokens = None |
||
2914 | self.modules = set() |
||
2915 | self.grammar = [] |
||
2916 | self.error = False |
||
2917 | |||
2918 | if log is None: |
||
2919 | self.log = PlyLogger(sys.stderr) |
||
2920 | else: |
||
2921 | self.log = log |
||
2922 | |||
2923 | # Get all of the basic information |
||
2924 | def get_all(self): |
||
2925 | self.get_start() |
||
2926 | self.get_error_func() |
||
2927 | self.get_tokens() |
||
2928 | self.get_precedence() |
||
2929 | self.get_pfunctions() |
||
2930 | |||
2931 | # Validate all of the information |
||
2932 | def validate_all(self): |
||
2933 | self.validate_start() |
||
2934 | self.validate_error_func() |
||
2935 | self.validate_tokens() |
||
2936 | self.validate_precedence() |
||
2937 | self.validate_pfunctions() |
||
2938 | self.validate_modules() |
||
2939 | return self.error |
||
2940 | |||
2941 | # Compute a signature over the grammar |
||
2942 | def signature(self): |
||
2943 | try: |
||
2944 | from hashlib import md5 |
||
2945 | except ImportError: |
||
2946 | from md5 import md5 |
||
2947 | try: |
||
2948 | sig = md5() |
||
2949 | if self.start: |
||
2950 | sig.update(self.start.encode('latin-1')) |
||
2951 | if self.prec: |
||
2952 | sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1')) |
||
2953 | if self.tokens: |
||
2954 | sig.update(' '.join(self.tokens).encode('latin-1')) |
||
2955 | for f in self.pfuncs: |
||
2956 | if f[3]: |
||
2957 | sig.update(f[3].encode('latin-1')) |
||
2958 | except (TypeError, ValueError): |
||
2959 | pass |
||
2960 | |||
2961 | digest = base64.b16encode(sig.digest()) |
||
2962 | if sys.version_info[0] >= 3: |
||
2963 | digest = digest.decode('latin-1') |
||
2964 | return digest |
||
2965 | |||
2966 | # ----------------------------------------------------------------------------- |
||
2967 | # validate_modules() |
||
2968 | # |
||
2969 | # This method checks to see if there are duplicated p_rulename() functions |
||
2970 | # in the parser module file. Without this function, it is really easy for |
||
2971 | # users to make mistakes by cutting and pasting code fragments (and it's a real |
||
2972 | # bugger to try and figure out why the resulting parser doesn't work). Therefore, |
||
2973 | # we just do a little regular expression pattern matching of def statements |
||
2974 | # to try and detect duplicates. |
||
2975 | # ----------------------------------------------------------------------------- |
||
2976 | |||
2977 | def validate_modules(self): |
||
2978 | # Match def p_funcname( |
||
2979 | fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') |
||
2980 | |||
2981 | for module in self.modules: |
||
2982 | lines, linen = inspect.getsourcelines(module) |
||
2983 | |||
2984 | counthash = {} |
||
2985 | for linen, line in enumerate(lines): |
||
2986 | linen += 1 |
||
2987 | m = fre.match(line) |
||
2988 | if m: |
||
2989 | name = m.group(1) |
||
2990 | prev = counthash.get(name) |
||
2991 | if not prev: |
||
2992 | counthash[name] = linen |
||
2993 | else: |
||
2994 | filename = inspect.getsourcefile(module) |
||
2995 | self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', |
||
2996 | filename, linen, name, prev) |
||
2997 | |||
2998 | # Get the start symbol |
||
2999 | def get_start(self): |
||
3000 | self.start = self.pdict.get('start') |
||
3001 | |||
3002 | # Validate the start symbol |
||
3003 | def validate_start(self): |
||
3004 | if self.start is not None: |
||
3005 | if not isinstance(self.start, string_types): |
||
3006 | self.log.error("'start' must be a string") |
||
3007 | |||
3008 | # Look for error handler |
||
3009 | def get_error_func(self): |
||
3010 | self.error_func = self.pdict.get('p_error') |
||
3011 | |||
3012 | # Validate the error function |
||
3013 | def validate_error_func(self): |
||
3014 | if self.error_func: |
||
3015 | if isinstance(self.error_func, types.FunctionType): |
||
3016 | ismethod = 0 |
||
3017 | elif isinstance(self.error_func, types.MethodType): |
||
3018 | ismethod = 1 |
||
3019 | else: |
||
3020 | self.log.error("'p_error' defined, but is not a function or method") |
||
3021 | self.error = True |
||
3022 | return |
||
3023 | |||
3024 | eline = self.error_func.__code__.co_firstlineno |
||
3025 | efile = self.error_func.__code__.co_filename |
||
3026 | module = inspect.getmodule(self.error_func) |
||
3027 | self.modules.add(module) |
||
3028 | |||
3029 | argcount = self.error_func.__code__.co_argcount - ismethod |
||
3030 | if argcount != 1: |
||
3031 | self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) |
||
3032 | self.error = True |
||
3033 | |||
3034 | # Get the tokens map |
||
3035 | def get_tokens(self): |
||
3036 | tokens = self.pdict.get('tokens') |
||
3037 | if not tokens: |
||
3038 | self.log.error('No token list is defined') |
||
3039 | self.error = True |
||
3040 | return |
||
3041 | |||
3042 | if not isinstance(tokens, (list, tuple)): |
||
3043 | self.log.error('tokens must be a list or tuple') |
||
3044 | self.error = True |
||
3045 | return |
||
3046 | |||
3047 | if not tokens: |
||
3048 | self.log.error('tokens is empty') |
||
3049 | self.error = True |
||
3050 | return |
||
3051 | |||
3052 | self.tokens = tokens |
||
3053 | |||
3054 | # Validate the tokens |
||
3055 | def validate_tokens(self): |
||
3056 | # Validate the tokens. |
||
3057 | if 'error' in self.tokens: |
||
3058 | self.log.error("Illegal token name 'error'. Is a reserved word") |
||
3059 | self.error = True |
||
3060 | return |
||
3061 | |||
3062 | terminals = set() |
||
3063 | for n in self.tokens: |
||
3064 | if n in terminals: |
||
3065 | self.log.warning('Token %r multiply defined', n) |
||
3066 | terminals.add(n) |
||
3067 | |||
3068 | # Get the precedence map (if any) |
||
3069 | def get_precedence(self): |
||
3070 | self.prec = self.pdict.get('precedence') |
||
3071 | |||
3072 | # Validate and parse the precedence map |
||
3073 | def validate_precedence(self): |
||
3074 | preclist = [] |
||
3075 | if self.prec: |
||
3076 | if not isinstance(self.prec, (list, tuple)): |
||
3077 | self.log.error('precedence must be a list or tuple') |
||
3078 | self.error = True |
||
3079 | return |
||
3080 | for level, p in enumerate(self.prec): |
||
3081 | if not isinstance(p, (list, tuple)): |
||
3082 | self.log.error('Bad precedence table') |
||
3083 | self.error = True |
||
3084 | return |
||
3085 | |||
3086 | if len(p) < 2: |
||
3087 | self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) |
||
3088 | self.error = True |
||
3089 | return |
||
3090 | assoc = p[0] |
||
3091 | if not isinstance(assoc, string_types): |
||
3092 | self.log.error('precedence associativity must be a string') |
||
3093 | self.error = True |
||
3094 | return |
||
3095 | for term in p[1:]: |
||
3096 | if not isinstance(term, string_types): |
||
3097 | self.log.error('precedence items must be strings') |
||
3098 | self.error = True |
||
3099 | return |
||
3100 | preclist.append((term, assoc, level+1)) |
||
3101 | self.preclist = preclist |
||
3102 | |||
3103 | # Get all p_functions from the grammar |
||
3104 | def get_pfunctions(self): |
||
3105 | p_functions = [] |
||
3106 | for name, item in self.pdict.items(): |
||
3107 | if not name.startswith('p_') or name == 'p_error': |
||
3108 | continue |
||
3109 | if isinstance(item, (types.FunctionType, types.MethodType)): |
||
3110 | line = item.__code__.co_firstlineno |
||
3111 | module = inspect.getmodule(item) |
||
3112 | p_functions.append((line, module, name, item.__doc__)) |
||
3113 | |||
3114 | # Sort all of the actions by line number; make sure to stringify |
||
3115 | # modules to make them sortable, since `line` may not uniquely sort all |
||
3116 | # p functions |
||
3117 | p_functions.sort(key=lambda p_function: ( |
||
3118 | p_function[0], |
||
3119 | str(p_function[1]), |
||
3120 | p_function[2], |
||
3121 | p_function[3])) |
||
3122 | self.pfuncs = p_functions |
||
3123 | |||
3124 | # Validate all of the p_functions |
||
3125 | def validate_pfunctions(self): |
||
3126 | grammar = [] |
||
3127 | # Check for non-empty symbols |
||
3128 | if len(self.pfuncs) == 0: |
||
3129 | self.log.error('no rules of the form p_rulename are defined') |
||
3130 | self.error = True |
||
3131 | return |
||
3132 | |||
3133 | for line, module, name, doc in self.pfuncs: |
||
3134 | file = inspect.getsourcefile(module) |
||
3135 | func = self.pdict[name] |
||
3136 | if isinstance(func, types.MethodType): |
||
3137 | reqargs = 2 |
||
3138 | else: |
||
3139 | reqargs = 1 |
||
3140 | if func.__code__.co_argcount > reqargs: |
||
3141 | self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) |
||
3142 | self.error = True |
||
3143 | elif func.__code__.co_argcount < reqargs: |
||
3144 | self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) |
||
3145 | self.error = True |
||
3146 | elif not func.__doc__: |
||
3147 | self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', |
||
3148 | file, line, func.__name__) |
||
3149 | else: |
||
3150 | try: |
||
3151 | parsed_g = parse_grammar(doc, file, line) |
||
3152 | for g in parsed_g: |
||
3153 | grammar.append((name, g)) |
||
3154 | except SyntaxError as e: |
||
3155 | self.log.error(str(e)) |
||
3156 | self.error = True |
||
3157 | |||
3158 | # Looks like a valid grammar rule |
||
3159 | # Mark the file in which defined. |
||
3160 | self.modules.add(module) |
||
3161 | |||
3162 | # Secondary validation step that looks for p_ definitions that are not functions |
||
3163 | # or functions that look like they might be grammar rules. |
||
3164 | |||
3165 | for n, v in self.pdict.items(): |
||
3166 | if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): |
||
3167 | continue |
||
3168 | if n.startswith('t_'): |
||
3169 | continue |
||
3170 | if n.startswith('p_') and n != 'p_error': |
||
3171 | self.log.warning('%r not defined as a function', n) |
||
3172 | if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or |
||
3173 | (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): |
||
3174 | if v.__doc__: |
||
3175 | try: |
||
3176 | doc = v.__doc__.split(' ') |
||
3177 | if doc[1] == ':': |
||
3178 | self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', |
||
3179 | v.__code__.co_filename, v.__code__.co_firstlineno, n) |
||
3180 | except IndexError: |
||
3181 | pass |
||
3182 | |||
3183 | self.grammar = grammar |
||
3184 | |||
3185 | # ----------------------------------------------------------------------------- |
||
3186 | # yacc(module) |
||
3187 | # |
||
3188 | # Build a parser |
||
3189 | # ----------------------------------------------------------------------------- |
||
3190 | |||
3191 | def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, |
||
3192 | check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, |
||
3193 | outputdir=None, debuglog=None, errorlog=None, picklefile=None): |
||
3194 | |||
3195 | if tabmodule is None: |
||
3196 | tabmodule = tab_module |
||
3197 | |||
3198 | # Reference to the parsing method of the last built parser |
||
3199 | global parse |
||
3200 | |||
3201 | # If pickling is enabled, table files are not created |
||
3202 | if picklefile: |
||
3203 | write_tables = 0 |
||
3204 | |||
3205 | if errorlog is None: |
||
3206 | errorlog = PlyLogger(sys.stderr) |
||
3207 | |||
3208 | # Get the module dictionary used for the parser |
||
3209 | if module: |
||
3210 | _items = [(k, getattr(module, k)) for k in dir(module)] |
||
3211 | pdict = dict(_items) |
||
3212 | # If no __file__ attribute is available, try to obtain it from the __module__ instead |
||
3213 | if '__file__' not in pdict: |
||
3214 | pdict['__file__'] = sys.modules[pdict['__module__']].__file__ |
||
3215 | else: |
||
3216 | pdict = get_caller_module_dict(2) |
||
3217 | |||
3218 | if outputdir is None: |
||
3219 | # If no output directory is set, the location of the output files |
||
3220 | # is determined according to the following rules: |
||
3221 | # - If tabmodule specifies a package, files go into that package directory |
||
3222 | # - Otherwise, files go in the same directory as the specifying module |
||
3223 | if isinstance(tabmodule, types.ModuleType): |
||
3224 | srcfile = tabmodule.__file__ |
||
3225 | else: |
||
3226 | if '.' not in tabmodule: |
||
3227 | srcfile = pdict['__file__'] |
||
3228 | else: |
||
3229 | parts = tabmodule.split('.') |
||
3230 | pkgname = '.'.join(parts[:-1]) |
||
3231 | exec('import %s' % pkgname) |
||
3232 | srcfile = getattr(sys.modules[pkgname], '__file__', '') |
||
3233 | outputdir = os.path.dirname(srcfile) |
||
3234 | |||
3235 | # Determine if the module is package of a package or not. |
||
3236 | # If so, fix the tabmodule setting so that tables load correctly |
||
3237 | pkg = pdict.get('__package__') |
||
3238 | if pkg and isinstance(tabmodule, str): |
||
3239 | if '.' not in tabmodule: |
||
3240 | tabmodule = pkg + '.' + tabmodule |
||
3241 | |||
3242 | |||
3243 | |||
3244 | # Set start symbol if it's specified directly using an argument |
||
3245 | if start is not None: |
||
3246 | pdict['start'] = start |
||
3247 | |||
3248 | # Collect parser information from the dictionary |
||
3249 | pinfo = ParserReflect(pdict, log=errorlog) |
||
3250 | pinfo.get_all() |
||
3251 | |||
3252 | if pinfo.error: |
||
3253 | raise YaccError('Unable to build parser') |
||
3254 | |||
3255 | # Check signature against table files (if any) |
||
3256 | signature = pinfo.signature() |
||
3257 | |||
3258 | # Read the tables |
||
3259 | try: |
||
3260 | lr = LRTable() |
||
3261 | if picklefile: |
||
3262 | read_signature = lr.read_pickle(picklefile) |
||
3263 | else: |
||
3264 | read_signature = lr.read_table(tabmodule) |
||
3265 | if optimize or (read_signature == signature): |
||
3266 | try: |
||
3267 | lr.bind_callables(pinfo.pdict) |
||
3268 | parser = LRParser(lr, pinfo.error_func) |
||
3269 | parse = parser.parse |
||
3270 | return parser |
||
3271 | except Exception as e: |
||
3272 | errorlog.warning('There was a problem loading the table file: %r', e) |
||
3273 | except VersionError as e: |
||
3274 | errorlog.warning(str(e)) |
||
3275 | except ImportError: |
||
3276 | pass |
||
3277 | |||
3278 | if debuglog is None: |
||
3279 | if debug: |
||
3280 | try: |
||
3281 | debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) |
||
3282 | except IOError as e: |
||
3283 | errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) |
||
3284 | debuglog = NullLogger() |
||
3285 | else: |
||
3286 | debuglog = NullLogger() |
||
3287 | |||
3288 | debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) |
||
3289 | |||
3290 | errors = False |
||
3291 | |||
3292 | # Validate the parser information |
||
3293 | if pinfo.validate_all(): |
||
3294 | raise YaccError('Unable to build parser') |
||
3295 | |||
3296 | if not pinfo.error_func: |
||
3297 | errorlog.warning('no p_error() function is defined') |
||
3298 | |||
3299 | # Create a grammar object |
||
3300 | grammar = Grammar(pinfo.tokens) |
||
3301 | |||
3302 | # Set precedence level for terminals |
||
3303 | for term, assoc, level in pinfo.preclist: |
||
3304 | try: |
||
3305 | grammar.set_precedence(term, assoc, level) |
||
3306 | except GrammarError as e: |
||
3307 | errorlog.warning('%s', e) |
||
3308 | |||
3309 | # Add productions to the grammar |
||
3310 | for funcname, gram in pinfo.grammar: |
||
3311 | file, line, prodname, syms = gram |
||
3312 | try: |
||
3313 | grammar.add_production(prodname, syms, funcname, file, line) |
||
3314 | except GrammarError as e: |
||
3315 | errorlog.error('%s', e) |
||
3316 | errors = True |
||
3317 | |||
3318 | # Set the grammar start symbols |
||
3319 | try: |
||
3320 | if start is None: |
||
3321 | grammar.set_start(pinfo.start) |
||
3322 | else: |
||
3323 | grammar.set_start(start) |
||
3324 | except GrammarError as e: |
||
3325 | errorlog.error(str(e)) |
||
3326 | errors = True |
||
3327 | |||
3328 | if errors: |
||
3329 | raise YaccError('Unable to build parser') |
||
3330 | |||
3331 | # Verify the grammar structure |
||
3332 | undefined_symbols = grammar.undefined_symbols() |
||
3333 | for sym, prod in undefined_symbols: |
||
3334 | errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) |
||
3335 | errors = True |
||
3336 | |||
3337 | unused_terminals = grammar.unused_terminals() |
||
3338 | if unused_terminals: |
||
3339 | debuglog.info('') |
||
3340 | debuglog.info('Unused terminals:') |
||
3341 | debuglog.info('') |
||
3342 | for term in unused_terminals: |
||
3343 | errorlog.warning('Token %r defined, but not used', term) |
||
3344 | debuglog.info(' %s', term) |
||
3345 | |||
3346 | # Print out all productions to the debug log |
||
3347 | if debug: |
||
3348 | debuglog.info('') |
||
3349 | debuglog.info('Grammar') |
||
3350 | debuglog.info('') |
||
3351 | for n, p in enumerate(grammar.Productions): |
||
3352 | debuglog.info('Rule %-5d %s', n, p) |
||
3353 | |||
3354 | # Find unused non-terminals |
||
3355 | unused_rules = grammar.unused_rules() |
||
3356 | for prod in unused_rules: |
||
3357 | errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) |
||
3358 | |||
3359 | if len(unused_terminals) == 1: |
||
3360 | errorlog.warning('There is 1 unused token') |
||
3361 | if len(unused_terminals) > 1: |
||
3362 | errorlog.warning('There are %d unused tokens', len(unused_terminals)) |
||
3363 | |||
3364 | if len(unused_rules) == 1: |
||
3365 | errorlog.warning('There is 1 unused rule') |
||
3366 | if len(unused_rules) > 1: |
||
3367 | errorlog.warning('There are %d unused rules', len(unused_rules)) |
||
3368 | |||
3369 | if debug: |
||
3370 | debuglog.info('') |
||
3371 | debuglog.info('Terminals, with rules where they appear') |
||
3372 | debuglog.info('') |
||
3373 | terms = list(grammar.Terminals) |
||
3374 | terms.sort() |
||
3375 | for term in terms: |
||
3376 | debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) |
||
3377 | |||
3378 | debuglog.info('') |
||
3379 | debuglog.info('Nonterminals, with rules where they appear') |
||
3380 | debuglog.info('') |
||
3381 | nonterms = list(grammar.Nonterminals) |
||
3382 | nonterms.sort() |
||
3383 | for nonterm in nonterms: |
||
3384 | debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) |
||
3385 | debuglog.info('') |
||
3386 | |||
3387 | if check_recursion: |
||
3388 | unreachable = grammar.find_unreachable() |
||
3389 | for u in unreachable: |
||
3390 | errorlog.warning('Symbol %r is unreachable', u) |
||
3391 | |||
3392 | infinite = grammar.infinite_cycles() |
||
3393 | for inf in infinite: |
||
3394 | errorlog.error('Infinite recursion detected for symbol %r', inf) |
||
3395 | errors = True |
||
3396 | |||
3397 | unused_prec = grammar.unused_precedence() |
||
3398 | for term, assoc in unused_prec: |
||
3399 | errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) |
||
3400 | errors = True |
||
3401 | |||
3402 | if errors: |
||
3403 | raise YaccError('Unable to build parser') |
||
3404 | |||
3405 | # Run the LRGeneratedTable on the grammar |
||
3406 | if debug: |
||
3407 | errorlog.debug('Generating %s tables', method) |
||
3408 | |||
3409 | lr = LRGeneratedTable(grammar, method, debuglog) |
||
3410 | |||
3411 | if debug: |
||
3412 | num_sr = len(lr.sr_conflicts) |
||
3413 | |||
3414 | # Report shift/reduce and reduce/reduce conflicts |
||
3415 | if num_sr == 1: |
||
3416 | errorlog.warning('1 shift/reduce conflict') |
||
3417 | elif num_sr > 1: |
||
3418 | errorlog.warning('%d shift/reduce conflicts', num_sr) |
||
3419 | |||
3420 | num_rr = len(lr.rr_conflicts) |
||
3421 | if num_rr == 1: |
||
3422 | errorlog.warning('1 reduce/reduce conflict') |
||
3423 | elif num_rr > 1: |
||
3424 | errorlog.warning('%d reduce/reduce conflicts', num_rr) |
||
3425 | |||
3426 | # Write out conflicts to the output file |
||
3427 | if debug and (lr.sr_conflicts or lr.rr_conflicts): |
||
3428 | debuglog.warning('') |
||
3429 | debuglog.warning('Conflicts:') |
||
3430 | debuglog.warning('') |
||
3431 | |||
3432 | for state, tok, resolution in lr.sr_conflicts: |
||
3433 | debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) |
||
3434 | |||
3435 | already_reported = set() |
||
3436 | for state, rule, rejected in lr.rr_conflicts: |
||
3437 | if (state, id(rule), id(rejected)) in already_reported: |
||
3438 | continue |
||
3439 | debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) |
||
3440 | debuglog.warning('rejected rule (%s) in state %d', rejected, state) |
||
3441 | errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) |
||
3442 | errorlog.warning('rejected rule (%s) in state %d', rejected, state) |
||
3443 | already_reported.add((state, id(rule), id(rejected))) |
||
3444 | |||
3445 | warned_never = [] |
||
3446 | for state, rule, rejected in lr.rr_conflicts: |
||
3447 | if not rejected.reduced and (rejected not in warned_never): |
||
3448 | debuglog.warning('Rule (%s) is never reduced', rejected) |
||
3449 | errorlog.warning('Rule (%s) is never reduced', rejected) |
||
3450 | warned_never.append(rejected) |
||
3451 | |||
3452 | # Write the table file if requested |
||
3453 | if write_tables: |
||
3454 | try: |
||
3455 | lr.write_table(tabmodule, outputdir, signature) |
||
3456 | except IOError as e: |
||
3457 | errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) |
||
3458 | |||
3459 | # Write a pickled version of the tables |
||
3460 | if picklefile: |
||
3461 | try: |
||
3462 | lr.pickle_table(picklefile, signature) |
||
3463 | except IOError as e: |
||
3464 | errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) |
||
3465 | |||
3466 | # Build the parser |
||
3467 | lr.bind_callables(pinfo.pdict) |
||
3468 | parser = LRParser(lr, pinfo.error_func) |
||
3469 | |||
3470 | parse = parser.parse |
||
3471 | return parser |