1 # -----------------------------------------------------------------------------
4 # Copyright (C) 2001-2015,
5 # David M. Beazley (Dabeaz LLC)
8 # SPDX-License-Identifier: BSD-3-Clause
9 # -----------------------------------------------------------------------------
11 # This implements an LR parser that is constructed from grammar rules defined
12 # as Python functions. The grammer is specified by supplying the BNF inside
13 # Python documentation strings. The inspiration for this technique was borrowed
14 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
15 # Spark and the GNU bison utility.
17 # The current implementation is only somewhat object-oriented. The
18 # LR parser itself is defined in terms of an object (which allows multiple
19 # parsers to co-exist). However, most of the variables used during table
20 # construction are defined in terms of global variables. Users shouldn't
21 # notice unless they are trying to define multiple parsers at the same
22 # time using threads (in which case they should have their head examined).
24 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
25 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
26 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
27 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
28 # by the more efficient DeRemer and Pennello algorithm.
30 # :::::::: WARNING :::::::
32 # Construction of LR parsing tables is fairly complicated and expensive.
33 # To make this module run fast, a *LOT* of work has been put into
34 # optimization---often at the expensive of readability and what might
35 # consider to be good Python "coding style." Modify the code at your
37 # ----------------------------------------------------------------------------
48 __tabversion__ = '3.8'
50 #-----------------------------------------------------------------------------
51 # === User configurable parameters ===
53 # Change these to modify the default behavior of yacc (if you wish)
54 #-----------------------------------------------------------------------------
56 yaccdebug = True # Debugging mode. If set, yacc generates a
57 # a 'parser.out' file in the current directory
59 debug_file = 'parser.out' # Default name of the debugging file
60 tab_module = 'parsetab' # Default name of the table module
61 default_lr = 'LALR' # Default LR table generation method
63 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
65 yaccdevel = False # Set to True if developing yacc. This turns off optimized
66 # implementations of certain functions.
68 resultlimit = 40 # Size limit of results when running in debug mode.
70 pickle_protocol = 0 # Protocol to use when writing pickle files
72 # String type-checking compatibility
73 if sys.version_info[0] < 3:
74 string_types = basestring
80 # This object is a stand-in for a logging object created by the
81 # logging module. PLY will use this by default to create things
82 # such as the parser.out file. If a user wants more detailed
83 # information, they can create their own logging object and pass
86 class PlyLogger(object):
87 def __init__(self, f):
90 def debug(self, msg, *args, **kwargs):
91 self.f.write((msg % args) + '\n')
95 def warning(self, msg, *args, **kwargs):
96 self.f.write('WARNING: ' + (msg % args) + '\n')
98 def error(self, msg, *args, **kwargs):
99 self.f.write('ERROR: ' + (msg % args) + '\n')
103 # Null logger is used when no output is generated. Does nothing.
104 class NullLogger(object):
105 def __getattribute__(self, name):
108 def __call__(self, *args, **kwargs):
111 # Exception raised for yacc-related errors
112 class YaccError(Exception):
115 # Format the result message that the parser produces when running in debug mode.
116 def format_result(r):
119 repr_str = repr(repr_str)
120 if len(repr_str) > resultlimit:
121 repr_str = repr_str[:resultlimit] + ' ...'
122 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
125 # Format stack entries when the parser is running in debug mode
126 def format_stack_entry(r):
129 repr_str = repr(repr_str)
130 if len(repr_str) < 16:
133 return '<%s @ 0x%x>' % (type(r).__name__, id(r))
135 # Panic mode error recovery support. This feature is being reworked--much of the
136 # code here is to offer a deprecation/backwards compatible transition
141 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
142 Instead, invoke the methods on the associated parser instance:
146 # Use parser.errok(), parser.token(), parser.restart()
153 warnings.warn(_warnmsg)
157 warnings.warn(_warnmsg)
161 warnings.warn(_warnmsg)
164 # Utility function to call the p_error() function with some deprecation hacks
165 def call_errorfunc(errorfunc, token, parser):
166 global _errok, _token, _restart
167 _errok = parser.errok
168 _token = parser.token
169 _restart = parser.restart
172 del _errok, _token, _restart
177 #-----------------------------------------------------------------------------
178 # === LR Parsing Engine ===
180 # The following classes are used for the LR parser itself. These are not
181 # used during table construction and are independent of the actual LR
182 # table generation algorithm
183 #-----------------------------------------------------------------------------
185 # This class is used to hold non-terminal grammar symbols during parsing.
186 # It normally has the following attributes set:
187 # .type = Grammar symbol type
188 # .value = Symbol value
189 # .lineno = Starting line number
190 # .endlineno = Ending line number (optional, set automatically)
191 # .lexpos = Starting lex position
192 # .endlexpos = Ending lex position (optional, set automatically)
201 # This class is a wrapper around the objects actually passed to each
202 # grammar rule. Index lookup and assignment actually assign the
203 # .value attribute of the underlying YaccSymbol object.
204 # The lineno() method returns the line number of a given
205 # item (or 0 if not defined). The linespan() method returns
206 # a tuple of (startline,endline) representing the range of lines
207 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
208 # representing the range of positional information for a symbol.
210 class YaccProduction:
211 def __init__(self, s, stack=None):
217 def __getitem__(self, n):
218 if isinstance(n, slice):
219 return [s.value for s in self.slice[n]]
221 return self.slice[n].value
223 return self.stack[n].value
225 def __setitem__(self, n, v):
226 self.slice[n].value = v
228 def __getslice__(self, i, j):
229 return [s.value for s in self.slice[i:j]]
232 return len(self.slice)
235 return getattr(self.slice[n], 'lineno', 0)
237 def set_lineno(self, n, lineno):
238 self.slice[n].lineno = lineno
240 def linespan(self, n):
241 startline = getattr(self.slice[n], 'lineno', 0)
242 endline = getattr(self.slice[n], 'endlineno', startline)
243 return startline, endline
246 return getattr(self.slice[n], 'lexpos', 0)
248 def lexspan(self, n):
249 startpos = getattr(self.slice[n], 'lexpos', 0)
250 endpos = getattr(self.slice[n], 'endlexpos', startpos)
251 return startpos, endpos
256 # -----------------------------------------------------------------------------
259 # The LR Parsing engine.
260 # -----------------------------------------------------------------------------
263 def __init__(self, lrtab, errorf):
264 self.productions = lrtab.lr_productions
265 self.action = lrtab.lr_action
266 self.goto = lrtab.lr_goto
267 self.errorfunc = errorf
268 self.set_defaulted_states()
275 del self.statestack[:]
279 self.symstack.append(sym)
280 self.statestack.append(0)
282 # Defaulted state support.
283 # This method identifies parser states where there is only one possible reduction action.
284 # For such states, the parser can make a choose to make a rule reduction without consuming
285 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
286 # certain kinds of advanced parsing situations where the lexer and parser interact with
287 # each other or change states (i.e., manipulation of scope, lexer states, etc.).
289 # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
290 def set_defaulted_states(self):
291 self.defaulted_states = {}
292 for state, actions in self.action.items():
293 rules = list(actions.values())
294 if len(rules) == 1 and rules[0] < 0:
295 self.defaulted_states[state] = rules[0]
297 def disable_defaulted_states(self):
298 self.defaulted_states = {}
300 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
301 if debug or yaccdevel:
302 if isinstance(debug, int):
303 debug = PlyLogger(sys.stderr)
304 return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
306 return self.parseopt(input, lexer, debug, tracking, tokenfunc)
308 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
311 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
314 # This is the debugging enabled version of parse(). All changes made to the
315 # parsing engine should be made here. Optimized versions of this function
316 # are automatically created by the ply/ygen.py script. This script cuts out
317 # sections enclosed in markers such as this:
323 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
325 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
326 #--! parsedebug-start
327 lookahead = None # Current lookahead symbol
328 lookaheadstack = [] # Stack of lookahead symbols
329 actions = self.action # Local reference to action table (to avoid lookup on self.)
330 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
331 prod = self.productions # Local reference to production list (to avoid lookup on self.)
332 defaulted_states = self.defaulted_states # Local reference to defaulted states
333 pslice = YaccProduction(None) # Production object passed to grammar rules
334 errorcount = 0 # Used during error recovery
337 debug.info('PLY: PARSE DEBUG START')
340 # If no lexer was given, we will try to use the lex module
345 # Set up the lexer and parser objects on pslice
349 # If input was supplied, pass to lexer
350 if input is not None:
353 if tokenfunc is None:
355 get_token = lexer.token
357 get_token = tokenfunc
359 # Set the parser() token method (sometimes used in error recovery)
360 self.token = get_token
362 # Set up the state and symbol stacks
364 statestack = [] # Stack of parsing states
365 self.statestack = statestack
366 symstack = [] # Stack of grammar symbols
367 self.symstack = symstack
369 pslice.stack = symstack # Put in the production
370 errtoken = None # Err token
372 # The start state is assumed to be (0,$end)
380 # Get the next symbol on the input. If a lookahead symbol
381 # is already set, we just use that. Otherwise, we'll pull
382 # the next token off of the lookaheadstack or from the lexer
386 debug.debug('State : %s', state)
389 if state not in defaulted_states:
391 if not lookaheadstack:
392 lookahead = get_token() # Get the next token
394 lookahead = lookaheadstack.pop()
396 lookahead = YaccSymbol()
397 lookahead.type = '$end'
399 # Check the action table
400 ltype = lookahead.type
401 t = actions[state].get(ltype)
403 t = defaulted_states[state]
405 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
409 debug.debug('Stack : %s',
410 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
415 # shift a symbol on the stack
420 debug.debug('Action : Shift and goto state %s', t)
423 symstack.append(lookahead)
426 # Decrease error count on successful shift
432 # reduce a symbol on the stack, emit a production
437 # Get production function
439 sym.type = pname # Production name
444 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
445 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
446 goto[statestack[-1-plen]][pname])
448 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
449 goto[statestack[-1]][pname])
454 targ = symstack[-plen-1:]
460 sym.lineno = t1.lineno
461 sym.lexpos = t1.lexpos
463 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
464 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
467 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
468 # The code enclosed in this section is duplicated
469 # below as a performance optimization. Make sure
470 # changes get made in both locations.
475 # Call the grammar rule with our special slice object
477 del statestack[-plen:]
480 debug.info('Result : %s', format_result(pslice[0]))
483 state = goto[statestack[-1]][pname]
484 statestack.append(state)
486 # If an error was set. Enter error recovery state
487 lookaheadstack.append(lookahead)
490 state = statestack[-1]
493 errorcount = error_count
496 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
502 sym.lineno = lexer.lineno
503 sym.lexpos = lexer.lexpos
508 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
509 # The code enclosed in this section is duplicated
510 # above as a performance optimization. Make sure
511 # changes get made in both locations.
516 # Call the grammar rule with our special slice object
519 debug.info('Result : %s', format_result(pslice[0]))
522 state = goto[statestack[-1]][pname]
523 statestack.append(state)
525 # If an error was set. Enter error recovery state
526 lookaheadstack.append(lookahead)
529 state = statestack[-1]
532 errorcount = error_count
535 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
539 result = getattr(n, 'value', None)
541 debug.info('Done : Returning %s', format_result(result))
542 debug.info('PLY: PARSE DEBUG END')
549 debug.error('Error : %s',
550 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
553 # We have some kind of parsing error here. To handle
554 # this, we are going to push the current token onto
555 # the tokenstack and replace it with an 'error' token.
556 # If there are any synchronization rules, they may
559 # In addition to pushing the error token, we call call
560 # the user defined p_error() function if this is the
561 # first syntax error. This function is only called if
563 if errorcount == 0 or self.errorok:
564 errorcount = error_count
567 if errtoken.type == '$end':
568 errtoken = None # End of file!
570 if errtoken and not hasattr(errtoken, 'lexer'):
571 errtoken.lexer = lexer
572 tok = call_errorfunc(self.errorfunc, errtoken, self)
574 # User must have done some kind of panic
575 # mode recovery on their own. The
576 # returned token is the next lookahead
582 if hasattr(errtoken, 'lineno'):
583 lineno = lookahead.lineno
587 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
589 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
591 sys.stderr.write('yacc: Parse error in input. EOF\n')
595 errorcount = error_count
597 # case 1: the statestack only has 1 entry on it. If we're in this state, the
598 # entire parse has been rolled back and we're completely hosed. The token is
599 # discarded and we just keep going.
601 if len(statestack) <= 1 and lookahead.type != '$end':
605 # Nuke the pushback stack
606 del lookaheadstack[:]
609 # case 2: the statestack has a couple of entries on it, but we're
610 # at the end of the file. nuke the top entry and generate an error token
612 # Start nuking entries on the stack
613 if lookahead.type == '$end':
614 # Whoa. We're really hosed here. Bail out
617 if lookahead.type != 'error':
619 if sym.type == 'error':
620 # Hmmm. Error is on top of stack, we'll just nuke input
621 # symbol and continue
624 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
625 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
630 # Create the error symbol for the first time and make it the new lookahead symbol
634 if hasattr(lookahead, 'lineno'):
635 t.lineno = t.endlineno = lookahead.lineno
636 if hasattr(lookahead, 'lexpos'):
637 t.lexpos = t.endlexpos = lookahead.lexpos
639 lookaheadstack.append(lookahead)
645 lookahead.lineno = sym.lineno
646 lookahead.lexpos = sym.lexpos
649 state = statestack[-1]
653 # Call an error function here
654 raise RuntimeError('yacc: internal parser error!!!\n')
658 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
661 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
662 # This code is automatically generated by the ply/ygen.py script. Make
663 # changes to the parsedebug() method instead.
664 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
666 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
668 lookahead = None # Current lookahead symbol
669 lookaheadstack = [] # Stack of lookahead symbols
670 actions = self.action # Local reference to action table (to avoid lookup on self.)
671 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
672 prod = self.productions # Local reference to production list (to avoid lookup on self.)
673 defaulted_states = self.defaulted_states # Local reference to defaulted states
674 pslice = YaccProduction(None) # Production object passed to grammar rules
675 errorcount = 0 # Used during error recovery
678 # If no lexer was given, we will try to use the lex module
683 # Set up the lexer and parser objects on pslice
687 # If input was supplied, pass to lexer
688 if input is not None:
691 if tokenfunc is None:
693 get_token = lexer.token
695 get_token = tokenfunc
697 # Set the parser() token method (sometimes used in error recovery)
698 self.token = get_token
700 # Set up the state and symbol stacks
702 statestack = [] # Stack of parsing states
703 self.statestack = statestack
704 symstack = [] # Stack of grammar symbols
705 self.symstack = symstack
707 pslice.stack = symstack # Put in the production
708 errtoken = None # Err token
710 # The start state is assumed to be (0,$end)
718 # Get the next symbol on the input. If a lookahead symbol
719 # is already set, we just use that. Otherwise, we'll pull
720 # the next token off of the lookaheadstack or from the lexer
723 if state not in defaulted_states:
725 if not lookaheadstack:
726 lookahead = get_token() # Get the next token
728 lookahead = lookaheadstack.pop()
730 lookahead = YaccSymbol()
731 lookahead.type = '$end'
733 # Check the action table
734 ltype = lookahead.type
735 t = actions[state].get(ltype)
737 t = defaulted_states[state]
742 # shift a symbol on the stack
747 symstack.append(lookahead)
750 # Decrease error count on successful shift
756 # reduce a symbol on the stack, emit a production
761 # Get production function
763 sym.type = pname # Production name
768 targ = symstack[-plen-1:]
774 sym.lineno = t1.lineno
775 sym.lexpos = t1.lexpos
777 sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
778 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
781 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
782 # The code enclosed in this section is duplicated
783 # below as a performance optimization. Make sure
784 # changes get made in both locations.
789 # Call the grammar rule with our special slice object
791 del statestack[-plen:]
794 state = goto[statestack[-1]][pname]
795 statestack.append(state)
797 # If an error was set. Enter error recovery state
798 lookaheadstack.append(lookahead)
801 state = statestack[-1]
804 errorcount = error_count
807 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
813 sym.lineno = lexer.lineno
814 sym.lexpos = lexer.lexpos
819 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
820 # The code enclosed in this section is duplicated
821 # above as a performance optimization. Make sure
822 # changes get made in both locations.
827 # Call the grammar rule with our special slice object
830 state = goto[statestack[-1]][pname]
831 statestack.append(state)
833 # If an error was set. Enter error recovery state
834 lookaheadstack.append(lookahead)
837 state = statestack[-1]
840 errorcount = error_count
843 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
847 result = getattr(n, 'value', None)
853 # We have some kind of parsing error here. To handle
854 # this, we are going to push the current token onto
855 # the tokenstack and replace it with an 'error' token.
856 # If there are any synchronization rules, they may
859 # In addition to pushing the error token, we call call
860 # the user defined p_error() function if this is the
861 # first syntax error. This function is only called if
863 if errorcount == 0 or self.errorok:
864 errorcount = error_count
867 if errtoken.type == '$end':
868 errtoken = None # End of file!
870 if errtoken and not hasattr(errtoken, 'lexer'):
871 errtoken.lexer = lexer
872 tok = call_errorfunc(self.errorfunc, errtoken, self)
874 # User must have done some kind of panic
875 # mode recovery on their own. The
876 # returned token is the next lookahead
882 if hasattr(errtoken, 'lineno'):
883 lineno = lookahead.lineno
887 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
889 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
891 sys.stderr.write('yacc: Parse error in input. EOF\n')
895 errorcount = error_count
897 # case 1: the statestack only has 1 entry on it. If we're in this state, the
898 # entire parse has been rolled back and we're completely hosed. The token is
899 # discarded and we just keep going.
901 if len(statestack) <= 1 and lookahead.type != '$end':
905 # Nuke the pushback stack
906 del lookaheadstack[:]
909 # case 2: the statestack has a couple of entries on it, but we're
910 # at the end of the file. nuke the top entry and generate an error token
912 # Start nuking entries on the stack
913 if lookahead.type == '$end':
914 # Whoa. We're really hosed here. Bail out
917 if lookahead.type != 'error':
919 if sym.type == 'error':
920 # Hmmm. Error is on top of stack, we'll just nuke input
921 # symbol and continue
924 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
925 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
930 # Create the error symbol for the first time and make it the new lookahead symbol
934 if hasattr(lookahead, 'lineno'):
935 t.lineno = t.endlineno = lookahead.lineno
936 if hasattr(lookahead, 'lexpos'):
937 t.lexpos = t.endlexpos = lookahead.lexpos
939 lookaheadstack.append(lookahead)
945 lookahead.lineno = sym.lineno
946 lookahead.lexpos = sym.lexpos
949 state = statestack[-1]
953 # Call an error function here
954 raise RuntimeError('yacc: internal parser error!!!\n')
958 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
959 # parseopt_notrack().
961 # Optimized version of parseopt() with line number tracking removed.
962 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
963 # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
964 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
966 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
967 #--! parseopt-notrack-start
968 lookahead = None # Current lookahead symbol
969 lookaheadstack = [] # Stack of lookahead symbols
970 actions = self.action # Local reference to action table (to avoid lookup on self.)
971 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
972 prod = self.productions # Local reference to production list (to avoid lookup on self.)
973 defaulted_states = self.defaulted_states # Local reference to defaulted states
974 pslice = YaccProduction(None) # Production object passed to grammar rules
975 errorcount = 0 # Used during error recovery
978 # If no lexer was given, we will try to use the lex module
983 # Set up the lexer and parser objects on pslice
987 # If input was supplied, pass to lexer
988 if input is not None:
991 if tokenfunc is None:
993 get_token = lexer.token
995 get_token = tokenfunc
997 # Set the parser() token method (sometimes used in error recovery)
998 self.token = get_token
1000 # Set up the state and symbol stacks
1002 statestack = [] # Stack of parsing states
1003 self.statestack = statestack
1004 symstack = [] # Stack of grammar symbols
1005 self.symstack = symstack
1007 pslice.stack = symstack # Put in the production
1008 errtoken = None # Err token
1010 # The start state is assumed to be (0,$end)
1012 statestack.append(0)
1015 symstack.append(sym)
1018 # Get the next symbol on the input. If a lookahead symbol
1019 # is already set, we just use that. Otherwise, we'll pull
1020 # the next token off of the lookaheadstack or from the lexer
1023 if state not in defaulted_states:
1025 if not lookaheadstack:
1026 lookahead = get_token() # Get the next token
1028 lookahead = lookaheadstack.pop()
1030 lookahead = YaccSymbol()
1031 lookahead.type = '$end'
1033 # Check the action table
1034 ltype = lookahead.type
1035 t = actions[state].get(ltype)
1037 t = defaulted_states[state]
1042 # shift a symbol on the stack
1043 statestack.append(t)
1047 symstack.append(lookahead)
1050 # Decrease error count on successful shift
1056 # reduce a symbol on the stack, emit a production
1061 # Get production function
1063 sym.type = pname # Production name
1068 targ = symstack[-plen-1:]
1072 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1073 # The code enclosed in this section is duplicated
1074 # below as a performance optimization. Make sure
1075 # changes get made in both locations.
1080 # Call the grammar rule with our special slice object
1081 del symstack[-plen:]
1082 del statestack[-plen:]
1084 symstack.append(sym)
1085 state = goto[statestack[-1]][pname]
1086 statestack.append(state)
1088 # If an error was set. Enter error recovery state
1089 lookaheadstack.append(lookahead)
1092 state = statestack[-1]
1095 errorcount = error_count
1096 self.errorok = False
1098 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1105 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1106 # The code enclosed in this section is duplicated
1107 # above as a performance optimization. Make sure
1108 # changes get made in both locations.
1113 # Call the grammar rule with our special slice object
1115 symstack.append(sym)
1116 state = goto[statestack[-1]][pname]
1117 statestack.append(state)
1119 # If an error was set. Enter error recovery state
1120 lookaheadstack.append(lookahead)
1123 state = statestack[-1]
1126 errorcount = error_count
1127 self.errorok = False
1129 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1133 result = getattr(n, 'value', None)
1139 # We have some kind of parsing error here. To handle
1140 # this, we are going to push the current token onto
1141 # the tokenstack and replace it with an 'error' token.
1142 # If there are any synchronization rules, they may
1145 # In addition to pushing the error token, we call call
1146 # the user defined p_error() function if this is the
1147 # first syntax error. This function is only called if
1149 if errorcount == 0 or self.errorok:
1150 errorcount = error_count
1151 self.errorok = False
1152 errtoken = lookahead
1153 if errtoken.type == '$end':
1154 errtoken = None # End of file!
1156 if errtoken and not hasattr(errtoken, 'lexer'):
1157 errtoken.lexer = lexer
1158 tok = call_errorfunc(self.errorfunc, errtoken, self)
1160 # User must have done some kind of panic
1161 # mode recovery on their own. The
1162 # returned token is the next lookahead
1168 if hasattr(errtoken, 'lineno'):
1169 lineno = lookahead.lineno
1173 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1175 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1177 sys.stderr.write('yacc: Parse error in input. EOF\n')
1181 errorcount = error_count
1183 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1184 # entire parse has been rolled back and we're completely hosed. The token is
1185 # discarded and we just keep going.
1187 if len(statestack) <= 1 and lookahead.type != '$end':
1191 # Nuke the pushback stack
1192 del lookaheadstack[:]
1195 # case 2: the statestack has a couple of entries on it, but we're
1196 # at the end of the file. nuke the top entry and generate an error token
1198 # Start nuking entries on the stack
1199 if lookahead.type == '$end':
1200 # Whoa. We're really hosed here. Bail out
1203 if lookahead.type != 'error':
1205 if sym.type == 'error':
1206 # Hmmm. Error is on top of stack, we'll just nuke input
1207 # symbol and continue
1211 # Create the error symbol for the first time and make it the new lookahead symbol
1215 if hasattr(lookahead, 'lineno'):
1216 t.lineno = t.endlineno = lookahead.lineno
1217 if hasattr(lookahead, 'lexpos'):
1218 t.lexpos = t.endlexpos = lookahead.lexpos
1220 lookaheadstack.append(lookahead)
1223 sym = symstack.pop()
1225 state = statestack[-1]
1229 # Call an error function here
1230 raise RuntimeError('yacc: internal parser error!!!\n')
1232 #--! parseopt-notrack-end
1234 # -----------------------------------------------------------------------------
1235 # === Grammar Representation ===
1237 # The following functions, classes, and variables are used to represent and
1238 # manipulate the rules that make up a grammar.
1239 # -----------------------------------------------------------------------------
1241 # regex matching identifiers
1242 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1244 # -----------------------------------------------------------------------------
1247 # This class stores the raw information about a single production or grammar rule.
1248 # A grammar rule refers to a specification such as this:
1250 # expr : expr PLUS term
1252 # Here are the basic attributes defined on all productions
1254 # name - Name of the production. For example 'expr'
1255 # prod - A list of symbols on the right side ['expr','PLUS','term']
1256 # prec - Production precedence level
1257 # number - Production number.
1258 # func - Function that executes on reduce
1259 # file - File where production function is defined
1260 # lineno - Line number where production function is defined
1262 # The following attributes are defined or optional.
1264 # len - Length of the production (number of symbols on right hand side)
1265 # usyms - Set of unique symbols found in the production
1266 # -----------------------------------------------------------------------------
1268 class Production(object):
1270 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1272 self.prod = tuple(prod)
1273 self.number = number
1275 self.callable = None
1278 self.prec = precedence
1280 # Internal settings used during table construction
1282 self.len = len(self.prod) # Length of the production
1284 # Create a list of unique production symbols used in the production
1287 if s not in self.usyms:
1288 self.usyms.append(s)
1290 # List of all LR items for the production
1294 # Create a string representation
1296 self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1298 self.str = '%s -> <empty>' % self.name
1304 return 'Production(' + str(self) + ')'
1307 return len(self.prod)
1309 def __nonzero__(self):
1312 def __getitem__(self, index):
1313 return self.prod[index]
1315 # Return the nth lr_item from the production (or None if at the end)
1316 def lr_item(self, n):
1317 if n > len(self.prod):
1320 # Precompute the list of productions immediately following.
1322 p.lr_after = Prodnames[p.prod[n+1]]
1323 except (IndexError, KeyError):
1326 p.lr_before = p.prod[n-1]
1331 # Bind the production function name to a callable
1332 def bind(self, pdict):
1334 self.callable = pdict[self.func]
1336 # This class serves as a minimal standin for Production objects when
1337 # reading table data from files. It only contains information
1338 # actually used by the LR parsing engine, plus some additional
1339 # debugging information.
1340 class MiniProduction(object):
1341 def __init__(self, str, name, len, func, file, line):
1345 self.callable = None
1354 return 'MiniProduction(%s)' % self.str
1356 # Bind the production function name to a callable
1357 def bind(self, pdict):
1359 self.callable = pdict[self.func]
1362 # -----------------------------------------------------------------------------
1365 # This class represents a specific stage of parsing a production rule. For
1368 # expr : expr . PLUS term
1370 # In the above, the "." represents the current location of the parse. Here
1373 # name - Name of the production. For example 'expr'
1374 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375 # number - Production number.
1377 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378 # then lr_next refers to 'expr -> expr PLUS . term'
1379 # lr_index - LR item index (location of the ".") in the prod list.
1380 # lookaheads - LALR lookahead symbols for this item
1381 # len - Length of the production (number of symbols on right hand side)
1382 # lr_after - List of all productions that immediately follow
1383 # lr_before - Grammar symbol immediately before
1384 # -----------------------------------------------------------------------------
1386 class LRItem(object):
1387 def __init__(self, p, n):
1389 self.prod = list(p.prod)
1390 self.number = p.number
1392 self.lookaheads = {}
1393 self.prod.insert(n, '.')
1394 self.prod = tuple(self.prod)
1395 self.len = len(self.prod)
1396 self.usyms = p.usyms
1400 s = '%s -> %s' % (self.name, ' '.join(self.prod))
1402 s = '%s -> <empty>' % self.name
1406 return 'LRItem(' + str(self) + ')'
1408 # -----------------------------------------------------------------------------
1409 # rightmost_terminal()
1411 # Return the rightmost terminal from a list of symbols. Used in add_production()
1412 # -----------------------------------------------------------------------------
1413 def rightmost_terminal(symbols, terminals):
1414 i = len(symbols) - 1
1416 if symbols[i] in terminals:
1421 # -----------------------------------------------------------------------------
1422 # === GRAMMAR CLASS ===
1424 # The following class represents the contents of the specified grammar along
1425 # with various computed properties such as first sets, follow sets, LR items, etc.
1426 # This data is used for critical parts of the table generation process later.
1427 # -----------------------------------------------------------------------------
1429 class GrammarError(YaccError):
1432 class Grammar(object):
1433 def __init__(self, terminals):
1434 self.Productions = [None] # A list of all of the productions. The first
1435 # entry is always reserved for the purpose of
1436 # building an augmented grammar
1438 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1439 # productions of that nonterminal.
1441 self.Prodmap = {} # A dictionary that is only used to detect duplicate
1444 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1445 # list of the rules where they are used.
1447 for term in terminals:
1448 self.Terminals[term] = []
1450 self.Terminals['error'] = []
1452 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1453 # of rule numbers where they are used.
1455 self.First = {} # A dictionary of precomputed FIRST(x) symbols
1457 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1459 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1460 # form ('right',level) or ('nonassoc', level) or ('left',level)
1462 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1463 # This is only used to provide error checking and to generate
1464 # a warning about unused precedence rules.
1466 self.Start = None # Starting symbol for the grammar
1470 return len(self.Productions)
1472 def __getitem__(self, index):
1473 return self.Productions[index]
1475 # -----------------------------------------------------------------------------
1478 # Sets the precedence for a given terminal. assoc is the associativity such as
1479 # 'left','right', or 'nonassoc'. level is a numeric level.
1481 # -----------------------------------------------------------------------------
1483 def set_precedence(self, term, assoc, level):
1484 assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1485 if term in self.Precedence:
1486 raise GrammarError('Precedence already specified for terminal %r' % term)
1487 if assoc not in ['left', 'right', 'nonassoc']:
1488 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489 self.Precedence[term] = (assoc, level)
1491 # -----------------------------------------------------------------------------
1494 # Given an action function, this function assembles a production rule and
1495 # computes its precedence level.
1497 # The production rule is supplied as a list of symbols. For example,
1498 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499 # symbols ['expr','PLUS','term'].
1501 # Precedence is determined by the precedence of the right-most non-terminal
1502 # or the precedence of a terminal specified by %prec.
1504 # A variety of error checks are performed to make sure production symbols
1505 # are valid and that %prec is used correctly.
1506 # -----------------------------------------------------------------------------
1508 def add_production(self, prodname, syms, func=None, file='', line=0):
1510 if prodname in self.Terminals:
1511 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1512 if prodname == 'error':
1513 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1514 if not _is_identifier.match(prodname):
1515 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1517 # Look for literal tokens
1518 for n, s in enumerate(syms):
1523 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524 (file, line, s, prodname))
1525 if c not in self.Terminals:
1526 self.Terminals[c] = []
1531 if not _is_identifier.match(s) and s != '%prec':
1532 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1534 # Determine the precedence level
1536 if syms[-1] == '%prec':
1537 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1538 if syms[-2] != '%prec':
1539 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1542 prodprec = self.Precedence.get(precname)
1544 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1546 self.UsedPrecedence.add(precname)
1547 del syms[-2:] # Drop %prec from the rule
1549 # If no %prec, precedence is determined by the rightmost terminal symbol
1550 precname = rightmost_terminal(syms, self.Terminals)
1551 prodprec = self.Precedence.get(precname, ('right', 0))
1553 # See if the rule is already in the rulemap
1554 map = '%s -> %s' % (prodname, syms)
1555 if map in self.Prodmap:
1556 m = self.Prodmap[map]
1557 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1558 'Previous definition at %s:%d' % (m.file, m.line))
1560 # From this point on, everything is valid. Create a new Production instance
1561 pnumber = len(self.Productions)
1562 if prodname not in self.Nonterminals:
1563 self.Nonterminals[prodname] = []
1565 # Add the production number to Terminals and Nonterminals
1567 if t in self.Terminals:
1568 self.Terminals[t].append(pnumber)
1570 if t not in self.Nonterminals:
1571 self.Nonterminals[t] = []
1572 self.Nonterminals[t].append(pnumber)
1574 # Create a production and add it to the list of productions
1575 p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1576 self.Productions.append(p)
1577 self.Prodmap[map] = p
1579 # Add to the global productions list
1581 self.Prodnames[prodname].append(p)
1583 self.Prodnames[prodname] = [p]
1585 # -----------------------------------------------------------------------------
1588 # Sets the starting symbol and creates the augmented grammar. Production
1589 # rule 0 is S' -> start where start is the start symbol.
1590 # -----------------------------------------------------------------------------
1592 def set_start(self, start=None):
1594 start = self.Productions[1].name
1595 if start not in self.Nonterminals:
1596 raise GrammarError('start symbol %s undefined' % start)
1597 self.Productions[0] = Production(0, "S'", [start])
1598 self.Nonterminals[start].append(0)
1601 # -----------------------------------------------------------------------------
1602 # find_unreachable()
1604 # Find all of the nonterminal symbols that can't be reached from the starting
1605 # symbol. Returns a list of nonterminals that can't be reached.
1606 # -----------------------------------------------------------------------------
1608 def find_unreachable(self):
1610 # Mark all symbols that are reachable from a symbol s
1611 def mark_reachable_from(s):
1615 for p in self.Prodnames.get(s, []):
1617 mark_reachable_from(r)
1620 mark_reachable_from(self.Productions[0].prod[0])
1621 return [s for s in self.Nonterminals if s not in reachable]
1623 # -----------------------------------------------------------------------------
1626 # This function looks at the various parsing rules and tries to detect
1627 # infinite recursion cycles (grammar rules where there is no possible way
1628 # to derive a string of only terminals).
1629 # -----------------------------------------------------------------------------
1631 def infinite_cycles(self):
1635 for t in self.Terminals:
1636 terminates[t] = True
1638 terminates['$end'] = True
1642 # Initialize to false:
1643 for n in self.Nonterminals:
1644 terminates[n] = False
1646 # Then propagate termination until no change:
1649 for (n, pl) in self.Prodnames.items():
1650 # Nonterminal n terminates iff any of its productions terminates.
1652 # Production p terminates iff all of its rhs symbols terminate.
1654 if not terminates[s]:
1655 # The symbol s does not terminate,
1656 # so production p does not terminate.
1657 p_terminates = False
1660 # didn't break from the loop,
1661 # so every symbol s terminates
1662 # so production p terminates.
1666 # symbol n terminates!
1667 if not terminates[n]:
1668 terminates[n] = True
1670 # Don't need to consider any more productions for this n.
1677 for (s, term) in terminates.items():
1679 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1680 # s is used-but-not-defined, and we've already warned of that,
1681 # so it would be overkill to say that it's also non-terminating.
1688 # -----------------------------------------------------------------------------
1689 # undefined_symbols()
1691 # Find all symbols that were used the grammar, but not defined as tokens or
1692 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1693 # and prod is the production where the symbol was used.
1694 # -----------------------------------------------------------------------------
1695 def undefined_symbols(self):
1697 for p in self.Productions:
1702 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703 result.append((s, p))
1706 # -----------------------------------------------------------------------------
1707 # unused_terminals()
1709 # Find all terminals that were defined, but not used by the grammar. Returns
1710 # a list of all symbols.
1711 # -----------------------------------------------------------------------------
1712 def unused_terminals(self):
1714 for s, v in self.Terminals.items():
1715 if s != 'error' and not v:
1716 unused_tok.append(s)
1720 # ------------------------------------------------------------------------------
1723 # Find all grammar rules that were defined, but not used (maybe not reachable)
1724 # Returns a list of productions.
1725 # ------------------------------------------------------------------------------
1727 def unused_rules(self):
1729 for s, v in self.Nonterminals.items():
1731 p = self.Prodnames[s][0]
1732 unused_prod.append(p)
1735 # -----------------------------------------------------------------------------
1736 # unused_precedence()
1738 # Returns a list of tuples (term,precedence) corresponding to precedence
1739 # rules that were never used by the grammar. term is the name of the terminal
1740 # on which precedence was applied and precedence is a string such as 'left' or
1741 # 'right' corresponding to the type of precedence.
1742 # -----------------------------------------------------------------------------
1744 def unused_precedence(self):
1746 for termname in self.Precedence:
1747 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1748 unused.append((termname, self.Precedence[termname][0]))
1752 # -------------------------------------------------------------------------
1755 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1757 # During execution of compute_first1, the result may be incomplete.
1758 # Afterward (e.g., when called from compute_follow()), it will be complete.
1759 # -------------------------------------------------------------------------
1760 def _first(self, beta):
1762 # We are computing First(x1,x2,x3,...,xn)
1765 x_produces_empty = False
1767 # Add all the non-<empty> symbols of First[x] to the result.
1768 for f in self.First[x]:
1770 x_produces_empty = True
1775 if x_produces_empty:
1776 # We have to consider the next x in beta,
1777 # i.e. stay in the loop.
1780 # We don't have to consider any further symbols in beta.
1783 # There was no 'break' from the loop,
1784 # so x_produces_empty was true for all x in beta,
1785 # so beta produces empty as well.
1786 result.append('<empty>')
1790 # -------------------------------------------------------------------------
1793 # Compute the value of FIRST1(X) for all symbols
1794 # -------------------------------------------------------------------------
1795 def compute_first(self):
1800 for t in self.Terminals:
1803 self.First['$end'] = ['$end']
1807 # Initialize to the empty set:
1808 for n in self.Nonterminals:
1811 # Then propagate symbols until no change:
1814 for n in self.Nonterminals:
1815 for p in self.Prodnames[n]:
1816 for f in self._first(p.prod):
1817 if f not in self.First[n]:
1818 self.First[n].append(f)
1825 # ---------------------------------------------------------------------
1828 # Computes all of the follow sets for every non-terminal symbol. The
1829 # follow set is the set of all symbols that might follow a given
1830 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1831 # ---------------------------------------------------------------------
1832 def compute_follow(self, start=None):
1833 # If already computed, return the result
1837 # If first sets not computed yet, do that first.
1839 self.compute_first()
1841 # Add '$end' to the follow list of the start symbol
1842 for k in self.Nonterminals:
1846 start = self.Productions[1].name
1848 self.Follow[start] = ['$end']
1852 for p in self.Productions[1:]:
1853 # Here is the production set
1854 for i, B in enumerate(p.prod):
1855 if B in self.Nonterminals:
1856 # Okay. We got a non-terminal in a production
1857 fst = self._first(p.prod[i+1:])
1860 if f != '<empty>' and f not in self.Follow[B]:
1861 self.Follow[B].append(f)
1865 if hasempty or i == (len(p.prod)-1):
1866 # Add elements of follow(a) to follow(b)
1867 for f in self.Follow[p.name]:
1868 if f not in self.Follow[B]:
1869 self.Follow[B].append(f)
1876 # -----------------------------------------------------------------------------
1879 # This function walks the list of productions and builds a complete set of the
1880 # LR items. The LR items are stored in two ways: First, they are uniquely
1881 # numbered and placed in the list _lritems. Second, a linked list of LR items
1882 # is built for each production. For example:
1888 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889 # -----------------------------------------------------------------------------
1891 def build_lritems(self):
1892 for p in self.Productions:
1901 # Precompute the list of productions immediately following
1903 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1904 except (IndexError, KeyError):
1907 lri.lr_before = lri.prod[i-1]
1909 lri.lr_before = None
1911 lastlri.lr_next = lri
1914 lr_items.append(lri)
1917 p.lr_items = lr_items
1919 # -----------------------------------------------------------------------------
1920 # == Class LRTable ==
1922 # This basic class represents a basic table of LR parsing information.
1923 # Methods for generating the tables are not defined here. They are defined
1924 # in the derived class LRGeneratedTable.
1925 # -----------------------------------------------------------------------------
1927 class VersionError(YaccError):
1930 class LRTable(object):
1932 self.lr_action = None
1934 self.lr_productions = None
1935 self.lr_method = None
1937 def read_table(self, module):
1938 if isinstance(module, types.ModuleType):
1941 exec('import %s' % module)
1942 parsetab = sys.modules[module]
1944 if parsetab._tabversion != __tabversion__:
1945 raise VersionError('yacc table file version is out of date')
1947 self.lr_action = parsetab._lr_action
1948 self.lr_goto = parsetab._lr_goto
1950 self.lr_productions = []
1951 for p in parsetab._lr_productions:
1952 self.lr_productions.append(MiniProduction(*p))
1954 self.lr_method = parsetab._lr_method
1955 return parsetab._lr_signature
1957 def read_pickle(self, filename):
1959 import cPickle as pickle
1963 if not os.path.exists(filename):
1966 in_f = open(filename, 'rb')
1968 tabversion = pickle.load(in_f)
1969 if tabversion != __tabversion__:
1970 raise VersionError('yacc table file version is out of date')
1971 self.lr_method = pickle.load(in_f)
1972 signature = pickle.load(in_f)
1973 self.lr_action = pickle.load(in_f)
1974 self.lr_goto = pickle.load(in_f)
1975 productions = pickle.load(in_f)
1977 self.lr_productions = []
1978 for p in productions:
1979 self.lr_productions.append(MiniProduction(*p))
1984 # Bind all production function names to callable objects in pdict
1985 def bind_callables(self, pdict):
1986 for p in self.lr_productions:
1990 # -----------------------------------------------------------------------------
1991 # === LR Generator ===
1993 # The following classes and functions are used to generate LR parsing tables on
1995 # -----------------------------------------------------------------------------
1997 # -----------------------------------------------------------------------------
2001 # The following two functions are used to compute set valued functions
2004 # F(x) = F'(x) U U{F(y) | x R y}
2006 # This is used to compute the values of Read() sets as well as FOLLOW sets
2007 # in LALR(1) generation.
2009 # Inputs: X - An input set
2011 # FP - Set-valued function
2012 # ------------------------------------------------------------------------------
2014 def digraph(X, R, FP):
2022 traverse(x, N, stack, F, X, R, FP)
2025 def traverse(x, N, stack, F, X, R, FP):
2029 F[x] = FP(x) # F(X) <- F'(x)
2031 rel = R(x) # Get y's related to x
2034 traverse(y, N, stack, F, X, R, FP)
2035 N[x] = min(N[x], N[y])
2036 for a in F.get(y, []):
2040 N[stack[-1]] = MAXINT
2042 element = stack.pop()
2044 N[stack[-1]] = MAXINT
2046 element = stack.pop()
2048 class LALRError(YaccError):
2051 # -----------------------------------------------------------------------------
2052 # == LRGeneratedTable ==
2054 # This class implements the LR table generation algorithm. There are no
2055 # public methods except for write()
2056 # -----------------------------------------------------------------------------
2058 class LRGeneratedTable(LRTable):
2059 def __init__(self, grammar, method='LALR', log=None):
2060 if method not in ['SLR', 'LALR']:
2061 raise LALRError('Unsupported method %s' % method)
2063 self.grammar = grammar
2064 self.lr_method = method
2071 # Internal attributes
2072 self.lr_action = {} # Action table
2073 self.lr_goto = {} # Goto table
2074 self.lr_productions = grammar.Productions # Copy of grammar Production array
2075 self.lr_goto_cache = {} # Cache of computed gotos
2076 self.lr0_cidhash = {} # Cache of closures
2078 self._add_count = 0 # Internal counter used to detect cycles
2080 # Diagonistic information filled in by the table generator
2081 self.sr_conflict = 0
2082 self.rr_conflict = 0
2083 self.conflicts = [] # List of conflicts
2085 self.sr_conflicts = []
2086 self.rr_conflicts = []
2089 self.grammar.build_lritems()
2090 self.grammar.compute_first()
2091 self.grammar.compute_follow()
2092 self.lr_parse_table()
2094 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2096 def lr0_closure(self, I):
2097 self._add_count += 1
2099 # Add everything in I to J
2105 for x in j.lr_after:
2106 if getattr(x, 'lr0_added', 0) == self._add_count:
2110 x.lr0_added = self._add_count
2115 # Compute the LR(0) goto function goto(I,X) where I is a set
2116 # of LR(0) items and X is a grammar symbol. This function is written
2117 # in a way that guarantees uniqueness of the generated goto sets
2118 # (i.e. the same goto set will never be returned as two different Python
2119 # objects). With uniqueness, we can later do fast set comparisons using
2120 # id(obj) instead of element-wise comparison.
2122 def lr0_goto(self, I, x):
2123 # First we look for a previously cached entry
2124 g = self.lr_goto_cache.get((id(I), x))
2128 # Now we generate the goto set in a way that guarantees uniqueness
2131 s = self.lr_goto_cache.get(x)
2134 self.lr_goto_cache[x] = s
2139 if n and n.lr_before == x:
2149 g = self.lr0_closure(gs)
2153 self.lr_goto_cache[(id(I), x)] = g
2156 # Compute the LR(0) sets of item function
2157 def lr0_items(self):
2158 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2161 self.lr0_cidhash[id(I)] = i
2164 # Loop over the items in C and each grammar symbols
2170 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2177 g = self.lr0_goto(I, x)
2178 if not g or id(g) in self.lr0_cidhash:
2180 self.lr0_cidhash[id(g)] = len(C)
2185 # -----------------------------------------------------------------------------
2186 # ==== LALR(1) Parsing ====
2188 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189 # relying upon Follow() sets when performing reductions, a more selective
2190 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191 # Thus, we mainly just have to focus on calculating the lookahead sets.
2193 # The method used here is due to DeRemer and Pennelo (1982).
2195 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2199 # Further details can also be found in:
2201 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202 # McGraw-Hill Book Company, (1985).
2204 # -----------------------------------------------------------------------------
2206 # -----------------------------------------------------------------------------
2207 # compute_nullable_nonterminals()
2209 # Creates a dictionary containing all of the non-terminals that might produce
2210 # an empty production.
2211 # -----------------------------------------------------------------------------
2213 def compute_nullable_nonterminals(self):
2217 for p in self.grammar.Productions[1:]:
2219 nullable.add(p.name)
2222 if t not in nullable:
2225 nullable.add(p.name)
2226 if len(nullable) == num_nullable:
2228 num_nullable = len(nullable)
2231 # -----------------------------------------------------------------------------
2232 # find_nonterminal_trans(C)
2234 # Given a set of LR(0) items, this functions finds all of the non-terminal
2235 # transitions. These are transitions in which a dot appears immediately before
2236 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2237 # is the state number and N is the nonterminal symbol.
2239 # The input C is the set of LR(0) items.
2240 # -----------------------------------------------------------------------------
2242 def find_nonterminal_transitions(self, C):
2244 for stateno, state in enumerate(C):
2246 if p.lr_index < p.len - 1:
2247 t = (stateno, p.prod[p.lr_index+1])
2248 if t[1] in self.grammar.Nonterminals:
2253 # -----------------------------------------------------------------------------
2256 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2257 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2259 # Returns a list of terminals.
2260 # -----------------------------------------------------------------------------
2262 def dr_relation(self, C, trans, nullable):
2267 g = self.lr0_goto(C[state], N)
2269 if p.lr_index < p.len - 1:
2270 a = p.prod[p.lr_index+1]
2271 if a in self.grammar.Terminals:
2275 # This extra bit is to handle the start state
2276 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2277 terms.append('$end')
2281 # -----------------------------------------------------------------------------
2284 # Computes the READS() relation (p,A) READS (t,C).
2285 # -----------------------------------------------------------------------------
2287 def reads_relation(self, C, trans, empty):
2288 # Look for empty transitions
2292 g = self.lr0_goto(C[state], N)
2293 j = self.lr0_cidhash.get(id(g), -1)
2295 if p.lr_index < p.len - 1:
2296 a = p.prod[p.lr_index + 1]
2302 # -----------------------------------------------------------------------------
2303 # compute_lookback_includes()
2305 # Determines the lookback and includes relations
2309 # This relation is determined by running the LR(0) state machine forward.
2310 # For example, starting with a production "N : . A B C", we run it forward
2311 # to obtain "N : A B C ." We then build a relationship between this final
2312 # state and the starting state. These relationships are stored in a dictionary
2317 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2319 # This relation is used to determine non-terminal transitions that occur
2320 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2321 # if the following holds:
2323 # B -> LAT, where T -> epsilon and p' -L-> p
2325 # L is essentially a prefix (which may be empty), T is a suffix that must be
2326 # able to derive an empty string. State p' must lead to state p with the string L.
2328 # -----------------------------------------------------------------------------
2330 def compute_lookback_includes(self, C, trans, nullable):
2331 lookdict = {} # Dictionary of lookback relations
2332 includedict = {} # Dictionary of include relations
2334 # Make a dictionary of non-terminal transitions
2339 # Loop over all transitions and compute lookbacks and includes
2340 for state, N in trans:
2347 # Okay, we have a name match. We now follow the production all the way
2348 # through the state machine until we get the . on the right hand side
2350 lr_index = p.lr_index
2352 while lr_index < p.len - 1:
2353 lr_index = lr_index + 1
2354 t = p.prod[lr_index]
2356 # Check to see if this symbol and state are a non-terminal transition
2357 if (j, t) in dtrans:
2358 # Yes. Okay, there is some chance that this is an includes relation
2359 # the only way to know for certain is whether the rest of the
2360 # production derives empty
2364 if p.prod[li] in self.grammar.Terminals:
2365 break # No forget it
2366 if p.prod[li] not in nullable:
2370 # Appears to be a relation between (j,t) and (state,N)
2371 includes.append((j, t))
2373 g = self.lr0_goto(C[j], t) # Go to next set
2374 j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2376 # When we get here, j is the final state, now we have to locate the production
2378 if r.name != p.name:
2383 # This look is comparing a production ". A B C" with "A B C ."
2384 while i < r.lr_index:
2385 if r.prod[i] != p.prod[i+1]:
2389 lookb.append((j, r))
2391 if i not in includedict:
2393 includedict[i].append((state, N))
2394 lookdict[(state, N)] = lookb
2396 return lookdict, includedict
2398 # -----------------------------------------------------------------------------
2399 # compute_read_sets()
2401 # Given a set of LR(0) items, this function computes the read sets.
2403 # Inputs: C = Set of LR(0) items
2404 # ntrans = Set of nonterminal transitions
2405 # nullable = Set of empty transitions
2407 # Returns a set containing the read sets
2408 # -----------------------------------------------------------------------------
2410 def compute_read_sets(self, C, ntrans, nullable):
2411 FP = lambda x: self.dr_relation(C, x, nullable)
2412 R = lambda x: self.reads_relation(C, x, nullable)
2413 F = digraph(ntrans, R, FP)
2416 # -----------------------------------------------------------------------------
2417 # compute_follow_sets()
2419 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2420 # and an include set, this function computes the follow sets
2422 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2425 # ntrans = Set of nonterminal transitions
2426 # readsets = Readset (previously computed)
2427 # inclsets = Include sets (previously computed)
2429 # Returns a set containing the follow sets
2430 # -----------------------------------------------------------------------------
2432 def compute_follow_sets(self, ntrans, readsets, inclsets):
2433 FP = lambda x: readsets[x]
2434 R = lambda x: inclsets.get(x, [])
2435 F = digraph(ntrans, R, FP)
2438 # -----------------------------------------------------------------------------
2441 # Attaches the lookahead symbols to grammar rules.
2443 # Inputs: lookbacks - Set of lookback relations
2444 # followset - Computed follow set
2446 # This function directly attaches the lookaheads to productions contained
2447 # in the lookbacks set
2448 # -----------------------------------------------------------------------------
2450 def add_lookaheads(self, lookbacks, followset):
2451 for trans, lb in lookbacks.items():
2452 # Loop over productions in lookback
2454 if state not in p.lookaheads:
2455 p.lookaheads[state] = []
2456 f = followset.get(trans, [])
2458 if a not in p.lookaheads[state]:
2459 p.lookaheads[state].append(a)
2461 # -----------------------------------------------------------------------------
2462 # add_lalr_lookaheads()
2464 # This function does all of the work of adding lookahead information for use
2466 # -----------------------------------------------------------------------------
2468 def add_lalr_lookaheads(self, C):
2469 # Determine all of the nullable nonterminals
2470 nullable = self.compute_nullable_nonterminals()
2472 # Find all non-terminal transitions
2473 trans = self.find_nonterminal_transitions(C)
2476 readsets = self.compute_read_sets(C, trans, nullable)
2478 # Compute lookback/includes relations
2479 lookd, included = self.compute_lookback_includes(C, trans, nullable)
2481 # Compute LALR FOLLOW sets
2482 followsets = self.compute_follow_sets(trans, readsets, included)
2484 # Add all of the lookaheads
2485 self.add_lookaheads(lookd, followsets)
2487 # -----------------------------------------------------------------------------
2490 # This function constructs the parse tables for SLR or LALR
2491 # -----------------------------------------------------------------------------
2492 def lr_parse_table(self):
2493 Productions = self.grammar.Productions
2494 Precedence = self.grammar.Precedence
2495 goto = self.lr_goto # Goto array
2496 action = self.lr_action # Action array
2497 log = self.log # Logger for output
2499 actionp = {} # Action production array (temporary)
2501 log.info('Parsing method: %s', self.lr_method)
2503 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2504 # This determines the number of states
2506 C = self.lr0_items()
2508 if self.lr_method == 'LALR':
2509 self.add_lalr_lookaheads(C)
2511 # Build the parser table, state by state
2514 # Loop over each production in I
2515 actlist = [] # List of actions
2520 log.info('state %d', st)
2523 log.info(' (%d) %s', p.number, p)
2527 if p.len == p.lr_index + 1:
2529 # Start symbol. Accept!
2530 st_action['$end'] = 0
2531 st_actionp['$end'] = p
2533 # We are at the end of a production. Reduce!
2534 if self.lr_method == 'LALR':
2535 laheads = p.lookaheads[st]
2537 laheads = self.grammar.Follow[p.name]
2539 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2540 r = st_action.get(a)
2542 # Whoa. Have a shift/reduce or reduce/reduce conflict
2544 # Need to decide on shift or reduce here
2545 # By default we favor shifting. Need to add
2546 # some precedence rules here.
2547 sprec, slevel = Productions[st_actionp[a].number].prec
2548 rprec, rlevel = Precedence.get(a, ('right', 0))
2549 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2550 # We really need to reduce here.
2551 st_action[a] = -p.number
2553 if not slevel and not rlevel:
2554 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2555 self.sr_conflicts.append((st, a, 'reduce'))
2556 Productions[p.number].reduced += 1
2557 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2560 # Hmmm. Guess we'll keep the shift
2562 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2563 self.sr_conflicts.append((st, a, 'shift'))
2565 # Reduce/reduce conflict. In this case, we favor the rule
2566 # that was defined first in the grammar file
2567 oldp = Productions[-r]
2568 pp = Productions[p.number]
2569 if oldp.line > pp.line:
2570 st_action[a] = -p.number
2572 chosenp, rejectp = pp, oldp
2573 Productions[p.number].reduced += 1
2574 Productions[oldp.number].reduced -= 1
2576 chosenp, rejectp = oldp, pp
2577 self.rr_conflicts.append((st, chosenp, rejectp))
2578 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2579 a, st_actionp[a].number, st_actionp[a])
2581 raise LALRError('Unknown conflict in state %d' % st)
2583 st_action[a] = -p.number
2585 Productions[p.number].reduced += 1
2588 a = p.prod[i+1] # Get symbol right after the "."
2589 if a in self.grammar.Terminals:
2590 g = self.lr0_goto(I, a)
2591 j = self.lr0_cidhash.get(id(g), -1)
2593 # We are in a shift state
2594 actlist.append((a, p, 'shift and go to state %d' % j))
2595 r = st_action.get(a)
2597 # Whoa have a shift/reduce or shift/shift conflict
2600 raise LALRError('Shift/shift conflict in state %d' % st)
2602 # Do a precedence check.
2603 # - if precedence of reduce rule is higher, we reduce.
2604 # - if precedence of reduce is same and left assoc, we reduce.
2605 # - otherwise we shift
2606 rprec, rlevel = Productions[st_actionp[a].number].prec
2607 sprec, slevel = Precedence.get(a, ('right', 0))
2608 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2609 # We decide to shift here... highest precedence to shift
2610 Productions[st_actionp[a].number].reduced -= 1
2614 log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2615 self.sr_conflicts.append((st, a, 'shift'))
2616 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2619 # Hmmm. Guess we'll keep the reduce
2620 if not slevel and not rlevel:
2621 log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2622 self.sr_conflicts.append((st, a, 'reduce'))
2625 raise LALRError('Unknown conflict in state %d' % st)
2630 # Print the actions associated with each terminal
2632 for a, p, m in actlist:
2634 if p is st_actionp[a]:
2635 log.info(' %-15s %s', a, m)
2636 _actprint[(a, m)] = 1
2638 # Print the actions that were not used. (debugging)
2640 for a, p, m in actlist:
2642 if p is not st_actionp[a]:
2643 if not (a, m) in _actprint:
2644 log.debug(' ! %-15s [ %s ]', a, m)
2646 _actprint[(a, m)] = 1
2650 # Construct the goto table for this state
2655 if s in self.grammar.Nonterminals:
2658 g = self.lr0_goto(I, n)
2659 j = self.lr0_cidhash.get(id(g), -1)
2662 log.info(' %-30s shift and go to state %d', n, j)
2664 action[st] = st_action
2665 actionp[st] = st_actionp
2669 # -----------------------------------------------------------------------------
2672 # This function writes the LR parsing tables to a file
2673 # -----------------------------------------------------------------------------
2675 def write_table(self, tabmodule, outputdir='', signature=''):
2676 if isinstance(tabmodule, types.ModuleType):
2677 raise IOError("Won't overwrite existing tabmodule")
2679 basemodulename = tabmodule.split('.')[-1]
2680 filename = os.path.join(outputdir, basemodulename) + '.py'
2682 f = open(filename, 'w')
2686 # This file is automatically generated. Do not edit.
2692 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2694 # Change smaller to 0 to go back to original tables
2697 # Factor out names to try and make smaller
2701 for s, nd in self.lr_action.items():
2702 for name, v in nd.items():
2710 f.write('\n_lr_action_items = {')
2711 for k, v in items.items():
2712 f.write('%r:([' % k)
2724 for _k, _v in _lr_action_items.items():
2725 for _x,_y in zip(_v[0],_v[1]):
2726 if not _x in _lr_action: _lr_action[_x] = {}
2727 _lr_action[_x][_k] = _y
2728 del _lr_action_items
2732 f.write('\n_lr_action = { ')
2733 for k, v in self.lr_action.items():
2734 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2738 # Factor out names to try and make smaller
2741 for s, nd in self.lr_goto.items():
2742 for name, v in nd.items():
2750 f.write('\n_lr_goto_items = {')
2751 for k, v in items.items():
2752 f.write('%r:([' % k)
2764 for _k, _v in _lr_goto_items.items():
2765 for _x, _y in zip(_v[0], _v[1]):
2766 if not _x in _lr_goto: _lr_goto[_x] = {}
2767 _lr_goto[_x][_k] = _y
2771 f.write('\n_lr_goto = { ')
2772 for k, v in self.lr_goto.items():
2773 f.write('(%r,%r):%r,' % (k[0], k[1], v))
2776 # Write production table
2777 f.write('_lr_productions = [\n')
2778 for p in self.lr_productions:
2780 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2781 p.func, os.path.basename(p.file), p.line))
2783 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2787 except IOError as e:
2791 # -----------------------------------------------------------------------------
2794 # This function pickles the LR parsing tables to a supplied file object
2795 # -----------------------------------------------------------------------------
2797 def pickle_table(self, filename, signature=''):
2799 import cPickle as pickle
2802 with open(filename, 'wb') as outf:
2803 pickle.dump(__tabversion__, outf, pickle_protocol)
2804 pickle.dump(self.lr_method, outf, pickle_protocol)
2805 pickle.dump(signature, outf, pickle_protocol)
2806 pickle.dump(self.lr_action, outf, pickle_protocol)
2807 pickle.dump(self.lr_goto, outf, pickle_protocol)
2810 for p in self.lr_productions:
2812 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2814 outp.append((str(p), p.name, p.len, None, None, None))
2815 pickle.dump(outp, outf, pickle_protocol)
2817 # -----------------------------------------------------------------------------
2818 # === INTROSPECTION ===
2820 # The following functions and classes are used to implement the PLY
2821 # introspection features followed by the yacc() function itself.
2822 # -----------------------------------------------------------------------------
2824 # -----------------------------------------------------------------------------
2825 # get_caller_module_dict()
2827 # This function returns a dictionary containing all of the symbols defined within
2828 # a caller further down the call stack. This is used to get the environment
2829 # associated with the yacc() call if none was provided.
2830 # -----------------------------------------------------------------------------
2832 def get_caller_module_dict(levels):
2833 f = sys._getframe(levels)
2834 ldict = f.f_globals.copy()
2835 if f.f_globals != f.f_locals:
2836 ldict.update(f.f_locals)
2839 # -----------------------------------------------------------------------------
2842 # This takes a raw grammar rule string and parses it into production data
2843 # -----------------------------------------------------------------------------
2844 def parse_grammar(doc, file, line):
2846 # Split the doc string into lines
2847 pstrings = doc.splitlines()
2857 # This is a continuation of a previous rule
2859 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2867 if assign != ':' and assign != '::=':
2868 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2870 grammar.append((file, dline, prodname, syms))
2874 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2878 # -----------------------------------------------------------------------------
2881 # This class represents information extracted for building a parser including
2882 # start symbol, error function, tokens, precedence list, action functions,
2884 # -----------------------------------------------------------------------------
2885 class ParserReflect(object):
2886 def __init__(self, pdict, log=None):
2889 self.error_func = None
2891 self.modules = set()
2896 self.log = PlyLogger(sys.stderr)
2900 # Get all of the basic information
2903 self.get_error_func()
2905 self.get_precedence()
2906 self.get_pfunctions()
2908 # Validate all of the information
2909 def validate_all(self):
2910 self.validate_start()
2911 self.validate_error_func()
2912 self.validate_tokens()
2913 self.validate_precedence()
2914 self.validate_pfunctions()
2915 self.validate_modules()
2918 # Compute a signature over the grammar
2919 def signature(self):
2921 from hashlib import md5
2927 sig.update(self.start.encode('latin-1'))
2929 sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
2931 sig.update(' '.join(self.tokens).encode('latin-1'))
2932 for f in self.pfuncs:
2934 sig.update(f[3].encode('latin-1'))
2935 except (TypeError, ValueError):
2938 digest = base64.b16encode(sig.digest())
2939 if sys.version_info[0] >= 3:
2940 digest = digest.decode('latin-1')
2943 # -----------------------------------------------------------------------------
2944 # validate_modules()
2946 # This method checks to see if there are duplicated p_rulename() functions
2947 # in the parser module file. Without this function, it is really easy for
2948 # users to make mistakes by cutting and pasting code fragments (and it's a real
2949 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2950 # we just do a little regular expression pattern matching of def statements
2951 # to try and detect duplicates.
2952 # -----------------------------------------------------------------------------
2954 def validate_modules(self):
2955 # Match def p_funcname(
2956 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2958 for module in self.modules:
2959 lines, linen = inspect.getsourcelines(module)
2962 for linen, line in enumerate(lines):
2967 prev = counthash.get(name)
2969 counthash[name] = linen
2971 filename = inspect.getsourcefile(module)
2972 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2973 filename, linen, name, prev)
2975 # Get the start symbol
2976 def get_start(self):
2977 self.start = self.pdict.get('start')
2979 # Validate the start symbol
2980 def validate_start(self):
2981 if self.start is not None:
2982 if not isinstance(self.start, string_types):
2983 self.log.error("'start' must be a string")
2985 # Look for error handler
2986 def get_error_func(self):
2987 self.error_func = self.pdict.get('p_error')
2989 # Validate the error function
2990 def validate_error_func(self):
2992 if isinstance(self.error_func, types.FunctionType):
2994 elif isinstance(self.error_func, types.MethodType):
2997 self.log.error("'p_error' defined, but is not a function or method")
3001 eline = self.error_func.__code__.co_firstlineno
3002 efile = self.error_func.__code__.co_filename
3003 module = inspect.getmodule(self.error_func)
3004 self.modules.add(module)
3006 argcount = self.error_func.__code__.co_argcount - ismethod
3008 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3011 # Get the tokens map
3012 def get_tokens(self):
3013 tokens = self.pdict.get('tokens')
3015 self.log.error('No token list is defined')
3019 if not isinstance(tokens, (list, tuple)):
3020 self.log.error('tokens must be a list or tuple')
3025 self.log.error('tokens is empty')
3029 self.tokens = tokens
3031 # Validate the tokens
3032 def validate_tokens(self):
3033 # Validate the tokens.
3034 if 'error' in self.tokens:
3035 self.log.error("Illegal token name 'error'. Is a reserved word")
3040 for n in self.tokens:
3042 self.log.warning('Token %r multiply defined', n)
3045 # Get the precedence map (if any)
3046 def get_precedence(self):
3047 self.prec = self.pdict.get('precedence')
3049 # Validate and parse the precedence map
3050 def validate_precedence(self):
3053 if not isinstance(self.prec, (list, tuple)):
3054 self.log.error('precedence must be a list or tuple')
3057 for level, p in enumerate(self.prec):
3058 if not isinstance(p, (list, tuple)):
3059 self.log.error('Bad precedence table')
3064 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3068 if not isinstance(assoc, string_types):
3069 self.log.error('precedence associativity must be a string')
3073 if not isinstance(term, string_types):
3074 self.log.error('precedence items must be strings')
3077 preclist.append((term, assoc, level+1))
3078 self.preclist = preclist
3080 # Get all p_functions from the grammar
3081 def get_pfunctions(self):
3083 for name, item in self.pdict.items():
3084 if not name.startswith('p_') or name == 'p_error':
3086 if isinstance(item, (types.FunctionType, types.MethodType)):
3087 line = item.__code__.co_firstlineno
3088 module = inspect.getmodule(item)
3089 p_functions.append((line, module, name, item.__doc__))
3091 # Sort all of the actions by line number; make sure to stringify
3092 # modules to make them sortable, since `line` may not uniquely sort all
3094 p_functions.sort(key=lambda p_function: (
3099 self.pfuncs = p_functions
3101 # Validate all of the p_functions
3102 def validate_pfunctions(self):
3104 # Check for non-empty symbols
3105 if len(self.pfuncs) == 0:
3106 self.log.error('no rules of the form p_rulename are defined')
3110 for line, module, name, doc in self.pfuncs:
3111 file = inspect.getsourcefile(module)
3112 func = self.pdict[name]
3113 if isinstance(func, types.MethodType):
3117 if func.__code__.co_argcount > reqargs:
3118 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3120 elif func.__code__.co_argcount < reqargs:
3121 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3123 elif not func.__doc__:
3124 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3125 file, line, func.__name__)
3128 parsed_g = parse_grammar(doc, file, line)
3130 grammar.append((name, g))
3131 except SyntaxError as e:
3132 self.log.error(str(e))
3135 # Looks like a valid grammar rule
3136 # Mark the file in which defined.
3137 self.modules.add(module)
3139 # Secondary validation step that looks for p_ definitions that are not functions
3140 # or functions that look like they might be grammar rules.
3142 for n, v in self.pdict.items():
3143 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3145 if n.startswith('t_'):
3147 if n.startswith('p_') and n != 'p_error':
3148 self.log.warning('%r not defined as a function', n)
3149 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3150 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3153 doc = v.__doc__.split(' ')
3155 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3156 v.__code__.co_filename, v.__code__.co_firstlineno, n)
3160 self.grammar = grammar
3162 # -----------------------------------------------------------------------------
3166 # -----------------------------------------------------------------------------
3168 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3169 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3170 outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3172 if tabmodule is None:
3173 tabmodule = tab_module
3175 # Reference to the parsing method of the last built parser
3178 # If pickling is enabled, table files are not created
3182 if errorlog is None:
3183 errorlog = PlyLogger(sys.stderr)
3185 # Get the module dictionary used for the parser
3187 _items = [(k, getattr(module, k)) for k in dir(module)]
3188 pdict = dict(_items)
3189 # If no __file__ attribute is available, try to obtain it from the __module__ instead
3190 if '__file__' not in pdict:
3191 pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3193 pdict = get_caller_module_dict(2)
3195 if outputdir is None:
3196 # If no output directory is set, the location of the output files
3197 # is determined according to the following rules:
3198 # - If tabmodule specifies a package, files go into that package directory
3199 # - Otherwise, files go in the same directory as the specifying module
3200 if isinstance(tabmodule, types.ModuleType):
3201 srcfile = tabmodule.__file__
3203 if '.' not in tabmodule:
3204 srcfile = pdict['__file__']
3206 parts = tabmodule.split('.')
3207 pkgname = '.'.join(parts[:-1])
3208 exec('import %s' % pkgname)
3209 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3210 outputdir = os.path.dirname(srcfile)
3212 # Determine if the module is package of a package or not.
3213 # If so, fix the tabmodule setting so that tables load correctly
3214 pkg = pdict.get('__package__')
3215 if pkg and isinstance(tabmodule, str):
3216 if '.' not in tabmodule:
3217 tabmodule = pkg + '.' + tabmodule
3221 # Set start symbol if it's specified directly using an argument
3222 if start is not None:
3223 pdict['start'] = start
3225 # Collect parser information from the dictionary
3226 pinfo = ParserReflect(pdict, log=errorlog)
3230 raise YaccError('Unable to build parser')
3232 # Check signature against table files (if any)
3233 signature = pinfo.signature()
3239 read_signature = lr.read_pickle(picklefile)
3241 read_signature = lr.read_table(tabmodule)
3242 if optimize or (read_signature == signature):
3244 lr.bind_callables(pinfo.pdict)
3245 parser = LRParser(lr, pinfo.error_func)
3246 parse = parser.parse
3248 except Exception as e:
3249 errorlog.warning('There was a problem loading the table file: %r', e)
3250 except VersionError as e:
3251 errorlog.warning(str(e))
3255 if debuglog is None:
3258 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3259 except IOError as e:
3260 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3261 debuglog = NullLogger()
3263 debuglog = NullLogger()
3265 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3269 # Validate the parser information
3270 if pinfo.validate_all():
3271 raise YaccError('Unable to build parser')
3273 if not pinfo.error_func:
3274 errorlog.warning('no p_error() function is defined')
3276 # Create a grammar object
3277 grammar = Grammar(pinfo.tokens)
3279 # Set precedence level for terminals
3280 for term, assoc, level in pinfo.preclist:
3282 grammar.set_precedence(term, assoc, level)
3283 except GrammarError as e:
3284 errorlog.warning('%s', e)
3286 # Add productions to the grammar
3287 for funcname, gram in pinfo.grammar:
3288 file, line, prodname, syms = gram
3290 grammar.add_production(prodname, syms, funcname, file, line)
3291 except GrammarError as e:
3292 errorlog.error('%s', e)
3295 # Set the grammar start symbols
3298 grammar.set_start(pinfo.start)
3300 grammar.set_start(start)
3301 except GrammarError as e:
3302 errorlog.error(str(e))
3306 raise YaccError('Unable to build parser')
3308 # Verify the grammar structure
3309 undefined_symbols = grammar.undefined_symbols()
3310 for sym, prod in undefined_symbols:
3311 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3314 unused_terminals = grammar.unused_terminals()
3315 if unused_terminals:
3317 debuglog.info('Unused terminals:')
3319 for term in unused_terminals:
3320 errorlog.warning('Token %r defined, but not used', term)
3321 debuglog.info(' %s', term)
3323 # Print out all productions to the debug log
3326 debuglog.info('Grammar')
3328 for n, p in enumerate(grammar.Productions):
3329 debuglog.info('Rule %-5d %s', n, p)
3331 # Find unused non-terminals
3332 unused_rules = grammar.unused_rules()
3333 for prod in unused_rules:
3334 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3336 if len(unused_terminals) == 1:
3337 errorlog.warning('There is 1 unused token')
3338 if len(unused_terminals) > 1:
3339 errorlog.warning('There are %d unused tokens', len(unused_terminals))
3341 if len(unused_rules) == 1:
3342 errorlog.warning('There is 1 unused rule')
3343 if len(unused_rules) > 1:
3344 errorlog.warning('There are %d unused rules', len(unused_rules))
3348 debuglog.info('Terminals, with rules where they appear')
3350 terms = list(grammar.Terminals)
3353 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3356 debuglog.info('Nonterminals, with rules where they appear')
3358 nonterms = list(grammar.Nonterminals)
3360 for nonterm in nonterms:
3361 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3365 unreachable = grammar.find_unreachable()
3366 for u in unreachable:
3367 errorlog.warning('Symbol %r is unreachable', u)
3369 infinite = grammar.infinite_cycles()
3370 for inf in infinite:
3371 errorlog.error('Infinite recursion detected for symbol %r', inf)
3374 unused_prec = grammar.unused_precedence()
3375 for term, assoc in unused_prec:
3376 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3380 raise YaccError('Unable to build parser')
3382 # Run the LRGeneratedTable on the grammar
3384 errorlog.debug('Generating %s tables', method)
3386 lr = LRGeneratedTable(grammar, method, debuglog)
3389 num_sr = len(lr.sr_conflicts)
3391 # Report shift/reduce and reduce/reduce conflicts
3393 errorlog.warning('1 shift/reduce conflict')
3395 errorlog.warning('%d shift/reduce conflicts', num_sr)
3397 num_rr = len(lr.rr_conflicts)
3399 errorlog.warning('1 reduce/reduce conflict')
3401 errorlog.warning('%d reduce/reduce conflicts', num_rr)
3403 # Write out conflicts to the output file
3404 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3405 debuglog.warning('')
3406 debuglog.warning('Conflicts:')
3407 debuglog.warning('')
3409 for state, tok, resolution in lr.sr_conflicts:
3410 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3412 already_reported = set()
3413 for state, rule, rejected in lr.rr_conflicts:
3414 if (state, id(rule), id(rejected)) in already_reported:
3416 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3417 debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3418 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3419 errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3420 already_reported.add((state, id(rule), id(rejected)))
3423 for state, rule, rejected in lr.rr_conflicts:
3424 if not rejected.reduced and (rejected not in warned_never):
3425 debuglog.warning('Rule (%s) is never reduced', rejected)
3426 errorlog.warning('Rule (%s) is never reduced', rejected)
3427 warned_never.append(rejected)
3429 # Write the table file if requested
3432 lr.write_table(tabmodule, outputdir, signature)
3433 except IOError as e:
3434 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3436 # Write a pickled version of the tables
3439 lr.pickle_table(picklefile, signature)
3440 except IOError as e:
3441 errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3444 lr.bind_callables(pinfo.pdict)
3445 parser = LRParser(lr, pinfo.error_func)
3447 parse = parser.parse