1 #-----------------------------------------------------------------------------
4 # Author(s): David M. Beazley (dave@dabeaz.com)
6 # Copyright (C) 2001-2006, David M. Beazley
8 # $Header: /cvs/projects/PLY/yacc.py,v 1.6 2004/05/26 20:51:34 beazley Exp $
10 # This library is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public
12 # License as published by the Free Software Foundation; either
13 # version 2.1 of the License, or (at your option) any later version.
15 # This library is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 # Lesser General Public License for more details.
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with this library; if not, write to the Free Software
22 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 # See the file COPYING for a complete copy of the LGPL.
27 # This implements an LR parser that is constructed from grammar rules defined
28 # as Python functions. Roughly speaking, this module is a cross between
29 # John Aycock's Spark system and the GNU bison utility.
31 # The current implementation is only somewhat object-oriented. The
32 # LR parser itself is defined in terms of an object (which allows multiple
33 # parsers to co-exist). However, most of the variables used during table
34 # construction are defined in terms of global variables. Users shouldn't
35 # notice unless they are trying to define multiple parsers at the same
36 # time using threads (in which case they should have their head examined).
38 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
39 # support was implemented by Elias Ioup (ezioup@alumni.uchicago.edu)
40 # and hacked abit by Dave to run faster.
42 # :::::::: WARNING :::::::
44 # Construction of LR parsing tables is fairly complicated and expensive.
45 # To make this module run fast, a *LOT* of work has been put into
46 # optimization---often at the expensive of readability and what might
47 # consider to be good Python "coding style." Modify the code at your
49 # ----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
56 # === User configurable parameters ===
58 # Change these to modify the default behavior of yacc (if you wish)
59 #-----------------------------------------------------------------------------
61 yaccdebug = 1 # Debugging mode. If set, yacc generates a
62 # a 'parser.out' file in the current directory
64 debug_file = 'parser.out' # Default name of the debugging file
65 tab_module = 'parsetab' # Default name of the table module
66 default_lr = 'SLR' # Default LR table generation method
68 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
70 import re, types, sys, cStringIO, md5, os.path
72 # Exception raised for yacc-related errors
73 class YaccError(Exception): pass
75 #-----------------------------------------------------------------------------
76 # === LR Parsing Engine ===
78 # The following classes are used for the LR parser itself. These are not
79 # used during table construction and are independent of the actual LR
80 # table generation algorithm
81 #-----------------------------------------------------------------------------
83 # This class is used to hold non-terminal grammar symbols during parsing.
84 # It normally has the following attributes set:
85 # .type = Grammar symbol type
86 # .value = Symbol value
87 # .lineno = Starting line number
88 # .endlineno = Ending line number (optional, set automatically)
91 def __str__(self): return self.type
92 def __repr__(self): return str(self)
94 # This class is a wrapper around the objects actually passed to each
95 # grammar rule. Index lookup and assignment actually assign the
96 # .value attribute of the underlying YaccSymbol object.
97 # The lineno() method returns the line number of a given
98 # item (or 0 if not defined). The linespan() method returns
99 # a tuple of (startline,endline) representing the range of lines
102 class YaccProduction:
103 def __init__(self,s):
107 def __getitem__(self,n):
108 if type(n) == types.IntType:
109 return self.slice[n].value
111 return [s.value for s in self.slice[n.start:n.stop:n.step]]
113 def __setitem__(self,n,v):
114 self.slice[n].value = v
117 return len(self.slice)
120 return getattr(self.slice[n],"lineno",0)
122 def linespan(self,n):
123 startline = getattr(self.slice[n],"lineno",0)
124 endline = getattr(self.slice[n],"endlineno",startline)
125 return startline,endline
127 def pushback(self,n):
129 raise ValueError, "Expected a positive value"
130 if n > (len(self.slice)-1):
131 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
133 self.pbstack.append(self.slice[-i-1])
135 # The LR Parsing engine. This is defined as a class so that multiple parsers
136 # can exist in the same process. A user never instantiates this directly.
137 # Instead, the global yacc() function should be used to create a suitable Parser
141 def __init__(self,magic=None):
143 # This is a hack to keep users from trying to instantiate a Parser
147 raise YaccError, "Can't instantiate Parser. Use yacc() instead."
149 # Reset internal state
150 self.productions = None # List of productions
151 self.errorfunc = None # Error handling function
152 self.action = { } # LR Action table
153 self.goto = { } # LR goto table
154 self.require = { } # Attribute require table
155 self.method = "Unknown LR" # Table construction method used
161 del self.statestack[:]
165 self.symstack.append(sym)
166 self.statestack.append(0)
168 def parse(self,input=None,lexer=None,debug=0):
169 lookahead = None # Current lookahead symbol
170 lookaheadstack = [ ] # Stack of lookahead symbols
171 actions = self.action # Local reference to action table
172 goto = self.goto # Local reference to goto table
173 prod = self.productions # Local reference to production list
174 pslice = YaccProduction(None) # Production object passed to grammar rules
175 pslice.parser = self # Parser object
176 self.errorcount = 0 # Used during error recovery
178 # If no lexer was given, we will try to use the lex module
184 # If input was supplied, pass to lexer
189 get_token = lexer.token
191 statestack = [ ] # Stack of parsing states
192 self.statestack = statestack
193 symstack = [ ] # Stack of grammar symbols
194 self.symstack = symstack
196 errtoken = None # Err token
198 # The start state is assumed to be (0,$)
205 # Get the next symbol on the input. If a lookahead symbol
206 # is already set, we just use that. Otherwise, we'll pull
207 # the next token off of the lookaheadstack or from the lexer
209 print 'state', statestack[-1]
211 if not lookaheadstack:
212 lookahead = get_token() # Get the next token
214 lookahead = lookaheadstack.pop()
216 lookahead = YaccSymbol()
219 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
221 # Check the action table
223 ltype = lookahead.type
224 t = actions.get((s,ltype),None)
229 t = actions.get((s,'#'),None)
231 print 'default action', t
234 # shift a symbol on the stack
236 # Error, end of input
237 sys.stderr.write("yacc: Parse error. EOF\n")
241 sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
242 symstack.append(lookahead)
245 # Decrease error count on successful shift
246 if self.errorcount > 0:
252 # reduce a symbol on the stack, emit a production
257 # Get production function
259 sym.type = pname # Production name
262 sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t))
265 targ = symstack[-plen-1:]
268 sym.lineno = targ[1].lineno
269 sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
270 except AttributeError:
273 del statestack[-plen:]
279 # Call the grammar rule with our special slice object
282 # If there was a pushback, put that on the stack
284 lookaheadstack.append(lookahead)
285 for _t in pslice.pbstack:
286 lookaheadstack.append(_t)
290 statestack.append(goto[statestack[-1],pname])
295 return getattr(n,"value",None)
296 sys.stderr.write(errorlead, "\n")
300 sys.stderr.write(errorlead + "\n")
301 # We have some kind of parsing error here. To handle
302 # this, we are going to push the current token onto
303 # the tokenstack and replace it with an 'error' token.
304 # If there are any synchronization rules, they may
307 # In addition to pushing the error token, we call call
308 # the user defined p_error() function if this is the
309 # first syntax error. This function is only called if
311 if not self.errorcount:
312 self.errorcount = error_count
314 if errtoken.type == '$':
315 errtoken = None # End of file!
317 global errok,token,restart
318 errok = self.errok # Set some special functions available in error recovery
320 restart = self.restart
321 tok = self.errorfunc(errtoken)
322 del errok, token, restart # Delete special functions
324 if not self.errorcount:
325 # User must have done some kind of panic
326 # mode recovery on their own. The
327 # returned token is the next lookahead
333 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
336 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
338 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
340 sys.stderr.write("yacc: Parse error in input. EOF\n")
344 self.errorcount = error_count
346 # case 1: the statestack only has 1 entry on it. If we're in this state, the
347 # entire parse has been rolled back and we're completely hosed. The token is
348 # discarded and we just keep going.
350 if len(statestack) <= 1 and lookahead.type != '$':
353 # Nuke the pushback stack
354 del lookaheadstack[:]
357 # case 2: the statestack has a couple of entries on it, but we're
358 # at the end of the file. nuke the top entry and generate an error token
360 # Start nuking entries on the stack
361 if lookahead.type == '$':
362 # Whoa. We're really hosed here. Bail out
365 if lookahead.type != 'error':
367 if sym.type == 'error':
368 # Hmmm. Error is on top of stack, we'll just nuke input
369 # symbol and continue
374 if hasattr(lookahead,"lineno"):
375 t.lineno = lookahead.lineno
377 lookaheadstack.append(lookahead)
385 # Call an error function here
386 raise RuntimeError, "yacc: internal parser error!!!\n"
388 # -----------------------------------------------------------------------------
389 # === Parser Construction ===
391 # The following functions and variables are used to implement the yacc() function
392 # itself. This is pretty hairy stuff involving lots of error checking,
393 # construction of LR items, kernels, and so forth. Although a lot of
394 # this work is done using global variables, the resulting Parser object
395 # is completely self contained--meaning that it is safe to repeatedly
396 # call yacc() with different grammars in the same application.
397 # -----------------------------------------------------------------------------
399 # -----------------------------------------------------------------------------
402 # This function checks to see if there are duplicated p_rulename() functions
403 # in the parser module file. Without this function, it is really easy for
404 # users to make mistakes by cutting and pasting code fragments (and it's a real
405 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
406 # we just do a little regular expression pattern matching of def statements
407 # to try and detect duplicates.
408 # -----------------------------------------------------------------------------
410 def validate_file(filename):
411 base,ext = os.path.splitext(filename)
412 if ext != '.py': return 1 # No idea. Assume it's okay.
416 lines = f.readlines()
421 # Match def p_funcname(
422 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
430 prev = counthash.get(name)
432 counthash[name] = linen
434 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev))
439 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
440 def validate_dict(d):
441 for n,v in d.items():
442 if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue
443 if n[0:2] == 't_': continue
446 sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n)
447 if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
449 doc = v.__doc__.split(" ")
451 sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n))
452 except StandardError:
455 # -----------------------------------------------------------------------------
456 # === GRAMMAR FUNCTIONS ===
458 # The following global variables and functions are used to store, manipulate,
459 # and verify the grammar rules specified by the user.
460 # -----------------------------------------------------------------------------
462 # Initialize all of the global variables used during grammar construction
463 def initialize_vars():
464 global Productions, Prodnames, Prodmap, Terminals
465 global Nonterminals, First, Follow, Precedence, LRitems
466 global Errorfunc, Signature, Requires
469 global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
471 Productions = [None] # A list of all of the productions. The first
472 # entry is always reserved for the purpose of
473 # building an augmented grammar
475 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
476 # productions of that nonterminal.
478 Prodmap = { } # A dictionary that is only used to detect duplicate
481 Terminals = { } # A dictionary mapping the names of terminal symbols to a
482 # list of the rules where they are used.
484 Nonterminals = { } # A dictionary mapping names of nonterminals to a list
485 # of rule numbers where they are used.
487 First = { } # A dictionary of precomputed FIRST(x) symbols
489 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
491 Precedence = { } # Precedence rules for each terminal. Contains tuples of the
492 # form ('right',level) or ('nonassoc', level) or ('left',level)
494 LRitems = [ ] # A list of all LR items for the grammar. These are the
495 # productions with the "dot" like E -> E . PLUS E
497 Errorfunc = None # User defined error handler
499 Signature = md5.new() # Digital signature of the grammar rules, precedence
500 # and other information. Used to determined when a
501 # parsing table needs to be regenerated.
503 Requires = { } # Requires list
505 # LALR(1) Initialization
506 Prodempty = { } # A dictionary of all productions that have an empty rule
507 # of the form P : <empty>
509 TReductions = { } # A dictionary of precomputer reductions from
510 # nonterminals to terminals
512 NTReductions = { } # A dictionary of precomputed reductions from
513 # nonterminals to nonterminals
515 GotoSetNum = { } # A dictionary that remembers goto sets based on
516 # the state number and symbol
518 Canonical = { } # A list of LR item sets. A LR item set is a list of LR
519 # items that represent the state of the parser
521 # File objects used when creating the parser.out debugging file
523 _vf = cStringIO.StringIO()
524 _vfc = cStringIO.StringIO()
526 # -----------------------------------------------------------------------------
529 # This class stores the raw information about a single production or grammar rule.
530 # It has a few required attributes:
532 # name - Name of the production (nonterminal)
533 # prod - A list of symbols making up its production
534 # number - Production number.
536 # In addition, a few additional attributes are used to help with debugging or
537 # optimization of table generation.
539 # file - File where production action is defined.
540 # lineno - Line number where action is defined
541 # func - Action function
542 # prec - Precedence level
543 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
544 # then lr_next refers to 'E -> E PLUS . E'
545 # lr_index - LR item index (location of the ".") in the prod list.
546 # lookaheads - LALR lookahead symbols for this item
547 # len - Length of the production (number of symbols on right hand side)
548 # -----------------------------------------------------------------------------
551 def __init__(self,**kw):
552 for k,v in kw.items():
555 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure
556 self.lr1_added = 0 # Flag indicating whether or not added to LR1
558 self.lookaheads = { }
560 self.setnumbers = [ ]
564 s = "%s -> %s" % (self.name," ".join(self.prod))
566 s = "%s -> <empty>" % self.name
572 # Compute lr_items from the production
574 if n > len(self.prod): return None
577 p.prod = list(self.prod)
578 p.number = self.number
581 p.setnumbers = self.setnumbers
583 p.prod = tuple(p.prod)
587 # Precompute list of productions immediately following
589 p.lrafter = Prodnames[p.prod[n+1]]
590 except (IndexError,KeyError),e:
593 p.lrbefore = p.prod[n-1]
599 class MiniProduction:
603 def is_identifier(s):
605 if not (c.isalnum() or c == '_'): return 0
608 # -----------------------------------------------------------------------------
611 # Given an action function, this function assembles a production rule.
612 # The production rule is assumed to be found in the function's docstring.
613 # This rule has the general syntax:
615 # name1 ::= production1
620 # name2 ::= production1
623 # -----------------------------------------------------------------------------
625 def add_production(f,file,line,prodname,syms):
627 if Terminals.has_key(prodname):
628 sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname))
630 if prodname == 'error':
631 sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname))
634 if not is_identifier(prodname):
635 sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
639 if not is_identifier(s) and s != '%prec':
640 sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
643 # See if the rule is already in the rulemap
644 map = "%s -> %s" % (prodname,syms)
645 if Prodmap.has_key(map):
647 sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m))
648 sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line))
657 p.number = len(Productions)
660 Productions.append(p)
662 if not Nonterminals.has_key(prodname):
663 Nonterminals[prodname] = [ ]
665 # Add all terminals to Terminals
667 while i < len(p.prod):
671 precname = p.prod[i+1]
673 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line))
676 prec = Precedence.get(precname,None)
678 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname))
686 if Terminals.has_key(t):
687 Terminals[t].append(p.number)
688 # Is a terminal. We'll assign a precedence to p based on this
689 if not hasattr(p,"prec"):
690 p.prec = Precedence.get(t,('right',0))
692 if not Nonterminals.has_key(t):
693 Nonterminals[t] = [ ]
694 Nonterminals[t].append(p.number)
697 if not hasattr(p,"prec"):
700 # Set final length of productions
702 p.prod = tuple(p.prod)
704 # Calculate unique syms in the production
710 # Add to the global productions list
712 Prodnames[p.name].append(p)
714 Prodnames[p.name] = [ p ]
717 # Given a raw rule function, this function rips out its doc string
718 # and adds rules to the grammar
721 line = f.func_code.co_firstlineno
722 file = f.func_code.co_filename
725 if isinstance(f,types.MethodType):
730 if f.func_code.co_argcount > reqdargs:
731 sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__))
734 if f.func_code.co_argcount < reqdargs:
735 sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__))
739 # Split the doc string into lines
740 pstrings = f.__doc__.splitlines()
749 # This is a continuation of a previous rule
751 sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline))
766 if assign != ':' and assign != '::=':
767 sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
769 e = add_production(f,file,dline,prodname,syms)
771 except StandardError:
772 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
775 sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__))
779 # Cycle checking code (Michael Dyck)
781 def compute_reachable():
783 Find each symbol that can be reached from the start symbol.
784 Print a warning for any nonterminals that can't be reached.
785 (Unused terminals have already had their warning.)
788 for s in Terminals.keys() + Nonterminals.keys():
791 mark_reachable_from( Productions[0].prod[0], Reachable )
793 for s in Nonterminals.keys():
795 sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
797 def mark_reachable_from(s, Reachable):
799 Mark all symbols that are reachable from symbol s.
802 # We've already reached symbol s.
805 for p in Prodnames.get(s,[]):
807 mark_reachable_from(r, Reachable)
809 # -----------------------------------------------------------------------------
810 # compute_terminates()
812 # This function looks at the various parsing rules and tries to detect
813 # infinite recursion cycles (grammar rules where there is no possible way
814 # to derive a string of only terminals).
815 # -----------------------------------------------------------------------------
816 def compute_terminates():
818 Raise an error for any symbols that don't terminate.
823 for t in Terminals.keys():
830 # Initialize to false:
831 for n in Nonterminals.keys():
834 # Then propagate termination until no change:
837 for (n,pl) in Prodnames.items():
838 # Nonterminal n terminates iff any of its productions terminates.
840 # Production p terminates iff all of its rhs symbols terminate.
842 if not Terminates[s]:
843 # The symbol s does not terminate,
844 # so production p does not terminate.
848 # didn't break from the loop,
849 # so every symbol s terminates
850 # so production p terminates.
854 # symbol n terminates!
855 if not Terminates[n]:
858 # Don't need to consider any more productions for this n.
865 for (s,terminates) in Terminates.items():
867 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
868 # s is used-but-not-defined, and we've already warned of that,
869 # so it would be overkill to say that it's also non-terminating.
872 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s)
877 # -----------------------------------------------------------------------------
878 # verify_productions()
880 # This function examines all of the supplied rules to see if they seem valid.
881 # -----------------------------------------------------------------------------
882 def verify_productions(cycle_check=1):
884 for p in Productions:
888 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
889 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s))
894 # Now verify all of the tokens
896 _vf.write("Unused terminals:\n\n")
897 for s,v in Terminals.items():
898 if s != 'error' and not v:
899 sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s)
900 if yaccdebug: _vf.write(" %s\n"% s)
903 # Print out all of the productions
905 _vf.write("\nGrammar\n\n")
906 for i in range(1,len(Productions)):
907 _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
910 # Verify the use of all productions
911 for s,v in Nonterminals.items():
914 sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s))
919 sys.stderr.write("yacc: Warning. There is 1 unused token.\n")
921 sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok)
924 sys.stderr.write("yacc: Warning. There is 1 unused rule.\n")
926 sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod)
929 _vf.write("\nTerminals, with rules where they appear\n\n")
930 ks = Terminals.keys()
933 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
934 _vf.write("\nNonterminals, with rules where they appear\n\n")
935 ks = Nonterminals.keys()
938 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
942 error += compute_terminates()
943 # error += check_cycles()
946 # -----------------------------------------------------------------------------
949 # This function walks the list of productions and builds a complete set of the
950 # LR items. The LR items are stored in two ways: First, they are uniquely
951 # numbered and placed in the list _lritems. Second, a linked list of LR items
952 # is built for each production. For example:
958 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
959 # -----------------------------------------------------------------------------
962 for p in Productions:
968 lastlri.lr_next = lri
970 lri.lr_num = len(LRitems)
975 # In order for the rest of the parser generator to work, we need to
976 # guarantee that no more lritems are generated. Therefore, we nuke
977 # the p.lr_item method. (Only used in debugging)
978 # Production.lr_item = None
980 # -----------------------------------------------------------------------------
983 # Given a list of precedence rules, add to the precedence table.
984 # -----------------------------------------------------------------------------
986 def add_precedence(plist):
994 if prec != 'left' and prec != 'right' and prec != 'nonassoc':
995 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec)
998 if Precedence.has_key(t):
999 sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t)
1002 Precedence[t] = (prec,plevel)
1004 sys.stderr.write("yacc: Invalid precedence table.\n")
1009 # -----------------------------------------------------------------------------
1012 # Compute the augmented grammar. This is just a rule S' -> start where start
1013 # is the starting symbol.
1014 # -----------------------------------------------------------------------------
1016 def augment_grammar(start=None):
1018 start = Productions[1].name
1019 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
1020 Productions[0].usyms = [ start ]
1021 Nonterminals[start].append(0)
1024 # -------------------------------------------------------------------------
1027 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1029 # During execution of compute_first1, the result may be incomplete.
1030 # Afterward (e.g., when called from compute_follow()), it will be complete.
1031 # -------------------------------------------------------------------------
1034 # We are computing First(x1,x2,x3,...,xn)
1037 x_produces_empty = 0
1039 # Add all the non-<empty> symbols of First[x] to the result.
1042 x_produces_empty = 1
1044 if f not in result: result.append(f)
1046 if x_produces_empty:
1047 # We have to consider the next x in beta,
1048 # i.e. stay in the loop.
1051 # We don't have to consider any further symbols in beta.
1054 # There was no 'break' from the loop,
1055 # so x_produces_empty was true for all x in beta,
1056 # so beta produces empty as well.
1057 result.append('<empty>')
1063 # Given a non-terminal. This function computes the set of all symbols
1064 # that might follow it. Dragon book, p. 189.
1066 def compute_follow(start=None):
1067 # Add '$' to the follow list of the start symbol
1068 for k in Nonterminals.keys():
1072 start = Productions[1].name
1074 Follow[start] = [ '$' ]
1078 for p in Productions[1:]:
1079 # Here is the production set
1080 for i in range(len(p.prod)):
1082 if Nonterminals.has_key(B):
1083 # Okay. We got a non-terminal in a production
1084 fst = first(p.prod[i+1:])
1087 if f != '<empty>' and f not in Follow[B]:
1092 if hasempty or i == (len(p.prod)-1):
1093 # Add elements of follow(a) to follow(b)
1094 for f in Follow[p.name]:
1095 if f not in Follow[B]:
1098 if not didadd: break
1101 _vf.write('\nFollow:\n')
1102 for k in Nonterminals.keys():
1103 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1105 # -------------------------------------------------------------------------
1108 # Compute the value of FIRST1(X) for all symbols
1109 # -------------------------------------------------------------------------
1110 def compute_first1():
1113 for t in Terminals.keys():
1117 First['#'] = ['#'] # what's this for?
1121 # Initialize to the empty set:
1122 for n in Nonterminals.keys():
1125 # Then propagate symbols until no change:
1128 for n in Nonterminals.keys():
1129 for p in Prodnames[n]:
1130 for f in first(p.prod):
1131 if f not in First[n]:
1132 First[n].append( f )
1138 _vf.write('\nFirst:\n')
1139 for k in Nonterminals.keys():
1140 _vf.write("%-20s : %s\n" %
1141 (k, " ".join([str(s) for s in First[k]])))
1143 # -----------------------------------------------------------------------------
1144 # === SLR Generation ===
1146 # The following functions are used to construct SLR (Simple LR) parsing tables
1147 # as described on p.221-229 of the dragon book.
1148 # -----------------------------------------------------------------------------
1150 # Global variables for the LR parsing engine
1152 global _lr_action, _lr_goto, _lr_method
1153 global _lr_goto_cache, _lr0_cidhash
1155 _lr_action = { } # Action table
1156 _lr_goto = { } # Goto table
1157 _lr_method = "Unknown" # LR method used
1158 _lr_goto_cache = { }
1162 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1163 # prodlist is a list of productions.
1165 _add_count = 0 # Counter used to detect cycles
1171 prodlist = Productions
1173 # Add everything in I to J
1180 if x.lr0_added == _add_count: continue
1183 x.lr0_added = _add_count
1188 # Compute the LR(0) goto function goto(I,X) where I is a set
1189 # of LR(0) items and X is a grammar symbol. This function is written
1190 # in a way that guarantees uniqueness of the generated goto sets
1191 # (i.e. the same goto set will never be returned as two different Python
1192 # objects). With uniqueness, we can later do fast set comparisons using
1193 # id(obj) instead of element-wise comparison.
1196 # First we look for a previously cached entry
1197 g = _lr_goto_cache.get((id(I),x),None)
1200 # Now we generate the goto set in a way that guarantees uniqueness
1203 s = _lr_goto_cache.get(x,None)
1206 _lr_goto_cache[x] = s
1211 if n and n.lrbefore == x:
1212 s1 = s.get(id(n),None)
1225 _lr_goto_cache[(id(I),x)] = g
1230 # Given a setnumber of an lr0 state and a symbol return the setnumber of the goto state
1231 def lr0_goto_setnumber(I_setnumber, x):
1235 if GotoSetNum.has_key((I_setnumber, x)):
1236 setnumber = GotoSetNum[(I_setnumber, x)]
1238 gset = lr0_goto(Canonical[I_setnumber], x)
1243 for i in xrange(len(gset[0].setnumbers)):
1246 if not item.setnumbers[i]:
1249 if inall and len(Canonical[i]) == gsetlen:
1251 break # Note: DB. I added this to improve performance.
1252 # Not sure if this breaks the algorithm (it doesn't appear to).
1254 GotoSetNum[(I_setnumber, x)] = setnumber
1258 # Compute the kernel of a set of LR(0) items
1262 if p.name == "S'" or p.lr_index > 0 or p.len == 0:
1269 # Compute the LR(0) sets of item function
1272 C = [ lr0_closure([Productions[0].lr_next]) ]
1275 _lr0_cidhash[id(I)] = i
1278 # Loop over the items in C and each grammar symbols
1284 # Collect all of the symbols that could possibly be in the goto(I,X) sets
1290 for x in asyms.keys():
1293 if _lr0_cidhash.has_key(id(g)): continue
1294 _lr0_cidhash[id(g)] = len(C)
1299 # -----------------------------------------------------------------------------
1302 # This function constructs an SLR table.
1303 # -----------------------------------------------------------------------------
1304 def slr_parse_table():
1306 goto = _lr_goto # Goto array
1307 action = _lr_action # Action array
1308 actionp = { } # Action production array (temporary)
1316 sys.stderr.write("yacc: Generating SLR parsing table...\n")
1317 _vf.write("\n\nParsing method: SLR\n\n")
1319 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1320 # This determines the number of states
1324 # Build the parser table, state by state
1327 # Loop over each production in I
1328 actlist = [ ] # List of actions
1331 _vf.write("\nstate %d\n\n" % st)
1333 _vf.write(" (%d) %s\n" % (p.number, str(p)))
1338 if p.prod[-1] == ".":
1340 # Start symbol. Accept!
1344 # We are at the end of a production. Reduce!
1345 for a in Follow[p.name]:
1346 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1347 r = action.get((st,a),None)
1349 # Whoa. Have a shift/reduce or reduce/reduce conflict
1351 # Need to decide on shift or reduce here
1352 # By default we favor shifting. Need to add
1353 # some precedence rules here.
1354 sprec,slevel = Productions[actionp[st,a].number].prec
1355 rprec,rlevel = Precedence.get(a,('right',0))
1356 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1357 # We really need to reduce here.
1358 action[st,a] = -p.number
1360 if not slevel and not rlevel:
1361 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1362 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1364 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1367 # Hmmm. Guess we'll keep the shift
1368 if not slevel and not rlevel:
1369 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1370 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1373 # Reduce/reduce conflict. In this case, we favor the rule
1374 # that was defined first in the grammar file
1375 oldp = Productions[-r]
1376 pp = Productions[p.number]
1377 if oldp.line > pp.line:
1378 action[st,a] = -p.number
1380 # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
1382 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
1383 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
1385 sys.stderr.write("Unknown conflict in state %d\n" % st)
1387 action[st,a] = -p.number
1391 a = p.prod[i+1] # Get symbol right after the "."
1392 if Terminals.has_key(a):
1394 j = _lr0_cidhash.get(id(g),-1)
1396 # We are in a shift state
1397 actlist.append((a,p,"shift and go to state %d" % j))
1398 r = action.get((st,a),None)
1400 # Whoa have a shift/reduce or shift/shift conflict
1403 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1405 # Do a precedence check.
1406 # - if precedence of reduce rule is higher, we reduce.
1407 # - if precedence of reduce is same and left assoc, we reduce.
1408 # - otherwise we shift
1409 rprec,rlevel = Productions[actionp[st,a].number].prec
1410 sprec,slevel = Precedence.get(a,('right',0))
1411 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1412 # We decide to shift here... highest precedence to shift
1415 if not slevel and not rlevel:
1417 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1418 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1419 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1422 # Hmmm. Guess we'll keep the reduce
1423 if not slevel and not rlevel:
1425 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1426 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1429 sys.stderr.write("Unknown conflict in state %d\n" % st)
1434 except StandardError,e:
1435 raise YaccError, "Hosed in slr_parse_table", e
1437 # Print the actions associated with each terminal
1440 for a,p,m in actlist:
1441 if action.has_key((st,a)):
1442 if p is actionp[st,a]:
1443 _vf.write(" %-15s %s\n" % (a,m))
1444 _actprint[(a,m)] = 1
1446 for a,p,m in actlist:
1447 if action.has_key((st,a)):
1448 if p is not actionp[st,a]:
1449 if not _actprint.has_key((a,m)):
1450 _vf.write(" ! %-15s [ %s ]\n" % (a,m))
1451 _actprint[(a,m)] = 1
1453 # Construct the goto table for this state
1459 if Nonterminals.has_key(s):
1461 for n in nkeys.keys():
1463 j = _lr0_cidhash.get(id(g),-1)
1467 _vf.write(" %-30s shift and go to state %d\n" % (n,j))
1472 if n_srconflict == 1:
1473 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
1474 if n_srconflict > 1:
1475 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
1476 if n_rrconflict == 1:
1477 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
1478 if n_rrconflict > 1:
1479 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
1483 # -----------------------------------------------------------------------------
1484 # ==== LALR(1) Parsing ====
1485 # FINISHED! 5/20/2003 by Elias Ioup
1486 # -----------------------------------------------------------------------------
1489 # Compute the lr1_closure of a set I. I is a list of productions and setnumber
1490 # is the state that you want the lr items that are made from the to come from.
1494 def lr1_closure(I, setnumber = 0):
1499 prodlist = Productions
1501 # Add everything in I to J
1512 jlr_index = j.lr_index
1513 jprodslice = jprod[jlr_index+2:]
1515 if jlr_index < len(jprod) - 1 and Nonterminals.has_key(jprod[jlr_index+1]):
1518 if j.lk_added.setdefault(setnumber, 0) < len(j.lookaheads[setnumber]):
1519 for a in j.lookaheads[setnumber][j.lk_added[setnumber]:]:
1520 # find b in FIRST(Xa) if j = [A->a.BX,a]
1521 temp_first_syms = first(jprodslice + (a,))
1522 for x in temp_first_syms:
1523 if x not in first_syms:
1524 first_syms.append(x)
1526 j.lk_added[setnumber] = len(j.lookaheads[setnumber])
1531 if x.lr_next.lookaheads.has_key(setnumber):
1532 _xlook = x.lr_next.lookaheads[setnumber]
1533 for s in first_syms:
1538 x.lr_next.lookaheads[setnumber] = first_syms
1542 if not Jhash.has_key(nid):
1548 def add_lookaheads(K):
1552 for setnumber in range(len(K)):
1553 for kitem in K[setnumber]:
1554 kitem.lookaheads[setnumber] = ['#']
1555 J = lr1_closure([kitem], setnumber)
1557 # find the lookaheads that are spontaneously created from closures
1558 # and the propogations of lookaheads between lr items
1560 if item.lr_index < len(item.prod)-1:
1561 for lookahead in item.lookaheads[setnumber]:
1562 goto_setnumber = lr0_goto_setnumber(setnumber, item.prod[item.lr_index+1])
1564 if lookahead != '#':
1565 if item.lr_next in K[goto_setnumber]:
1568 spontaneous.append((next, (lookahead, goto_setnumber)))
1570 if goto_setnumber > -1:
1571 if item.lr_next in K[goto_setnumber]:
1575 propogate.append(((kitem, setnumber), (next, goto_setnumber)))
1579 for x in K[setnumber]:
1580 x.lookaheads[setnumber] = []
1582 for x in spontaneous:
1583 if x[1][0] not in x[0].lookaheads[x[1][1]]:
1584 x[0].lookaheads[x[1][1]].append(x[1][0])
1586 K[0][0].lookaheads[0] = ['$']
1590 if pitems.has_key(x[0]):
1591 pitems[x[0]].append(x[1])
1594 pitems[x[0]].append(x[1])
1596 # propogate the lookaheads that were spontaneously generated
1597 # based on the propogations produced above
1605 pkey = (item, kindex)
1606 if pitems.has_key(pkey):
1607 for propogation in pitems[pkey]:
1608 gitem = propogation[0]
1609 gsetnumber = propogation[1]
1610 glookaheads = gitem.lookaheads[gsetnumber]
1611 for lookahead in item.lookaheads[kindex]:
1612 if lookahead not in glookaheads:
1613 glookaheads.append(lookahead)
1617 def ReduceNonterminals():
1623 for nt in Nonterminals.keys():
1624 TReductions[nt] = []
1625 NTReductions[nt] = []
1627 for nt in Nonterminals.keys():
1628 terms = ReduceToTerminals(nt)
1629 TReductions[nt].extend(terms)
1630 if not NTReductions.has_key(nt):
1631 ReduceToNonterminals(nt)
1635 def ReduceToTerminals(nt,cyclemap=None):
1638 reducedterminals = []
1639 if not cyclemap: cyclemap = {}
1641 if cyclemap.has_key(nt): return []
1644 for p in Prodnames[nt]:
1646 if Terminals.has_key(p.prod[0]):
1647 if p.prod[0] not in reducedterminals:
1648 reducedterminals.append(p.prod[0])
1651 terms = ReduceToTerminals(p.prod[0],cyclemap)
1653 if t not in reducedterminals:
1654 reducedterminals.append(t)
1656 return reducedterminals
1659 def ReduceToNonterminals(nt):
1663 reducednonterminals = []
1665 for p in Prodnames[nt]:
1667 if Nonterminals.has_key(p.prod[0]):
1668 if p.prod[0] not in reducednonterminals:
1669 reducednonterminals.append(p.prod[0])
1671 if not NTReductions.has_key(p.prod[0]):
1672 ReduceToNonterminals(p.prod[0])
1674 nterms = NTReductions[p.prod[0]]
1676 if nt not in reducednonterminals:
1677 reducednonterminals.append(nt)
1680 NTReductions[nt] = reducednonterminals
1682 # -----------------------------------------------------------------------------
1683 # lalr_parse_table()
1685 # This function constructs an LALR table.
1686 # -----------------------------------------------------------------------------
1687 def lalr_parse_table():
1689 goto = _lr_goto # Goto array
1690 action = _lr_action # Action array
1691 actionp = { } # Action production array (temporary)
1692 goto_cache = _lr_goto_cache
1693 cid_hash = _lr0_cidhash
1701 sys.stderr.write("yacc: Generating LALR(1) parsing table...\n")
1702 _vf.write("\n\nParsing method: LALR(1)\n\n")
1704 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1705 # This determines the number of states
1713 # Create the kernel states.
1718 K.append(lr0_kernel(x))
1720 y.setnumbers = setC[:]
1725 y.lookaheads[_cindex] = []
1726 y.setnumbers[_cindex] = 1
1727 _cindex = _cindex + 1
1730 # Add lookaheads to the lr items
1736 # Do the reductions for parsing first and keep them in globals
1739 ReduceNonterminals()
1746 for y in Prodempty.keys():
1747 EmptyAncestors[y] = []
1748 for x in NTReductions.items():
1750 if Prodempty.has_key(y):
1751 EmptyAncestors[y].append(x[0])
1754 # Build the parser table, state by state
1757 # Loop over each production in I
1758 actlist = [ ] # List of actions
1764 _vf.write("\nstate %d\n\n" % st)
1766 _vf.write(" (%d) %s\n" % (p.number, str(p)))
1772 if p.prod[-1] == ".":
1774 # Start symbol. Accept!
1777 elif len(p.prod) == 0:
1778 ancestors = EmptyAncestors[p.name]
1783 plist = Productions[i.name]
1785 if len(x.prod) > 0 and x.prod[0] == p.name:
1787 d = x.prod[lr_index+2:]
1788 for l in x.lookaheads.items():
1789 flist = First[tuple(n+d+[l])]
1791 if f not in input_list and f in p.lookaheads[st]:
1792 input_list.append(f)
1794 # We are at the end of a production. Reduce!
1795 #print "input_list: %s" % input_list
1796 #print "Follow[p.name]: %s" % Follow[p.name]
1797 for a in input_list:
1798 actlist.append((a,p,"reduce using rule %d (%s) " % (p.number,p)))
1799 r = action.get((st,a),None)
1801 # Whoa. Have a shift/reduce or reduce/reduce conflict
1803 # Need to decide on shift or reduce here
1804 # By default we favor shifting. Need to add
1805 # some precedence rules here.
1806 sprec,slevel = Productions[actionp[st,a].number].prec
1807 rprec,rlevel = Precedence.get(a,('right',0))
1808 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1809 # We really need to reduce here.
1810 action[st,a] = -p.number
1812 if not slevel and not rlevel:
1813 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1814 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1816 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1819 # Hmmm. Guess we'll keep the shift
1820 if not slevel and not rlevel:
1821 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1822 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1825 # Reduce/reduce conflict. In this case, we favor the rule
1826 # that was defined first in the grammar file
1827 oldp = Productions[-r]
1828 pp = Productions[p.number]
1829 if oldp.line > pp.line:
1830 action[st,a] = -p.number
1832 # print "Reduce/reduce conflict in state %d" % st
1834 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
1835 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
1837 sys.stderr.write("Unknown conflict in state %d\n" % st)
1839 action[st,a] = -p.number
1842 break # break out of the for s in K loop because we only want to make
1843 # sure that a production is in the Kernel
1846 # We are at the end of a production. Reduce!
1848 for a in p.lookaheads[st]:
1849 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1850 r = action.get((st,a),None)
1852 # Whoa. Have a shift/reduce or reduce/reduce conflict
1854 # Need to decide on shift or reduce here
1855 # By default we favor shifting. Need to add
1856 # some precedence rules here.
1857 sprec,slevel = Productions[actionp[st,a].number].prec
1858 rprec,rlevel = Precedence.get(a,('right',0))
1859 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1860 # We really need to reduce here.
1861 action[st,a] = -p.number
1863 if not slevel and not rlevel:
1864 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1865 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1867 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1870 # Hmmm. Guess we'll keep the shift
1871 if not slevel and not rlevel:
1872 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1873 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1876 # Reduce/reduce conflict. In this case, we favor the rule
1877 # that was defined first in the grammar file
1878 oldp = Productions[-r]
1879 pp = Productions[p.number]
1880 if oldp.line > pp.line:
1881 action[st,a] = -p.number
1883 # print "Reduce/reduce conflict in state %d" % st
1885 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
1886 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
1888 print "Unknown conflict in state %d" % st
1890 action[st,a] = -p.number
1894 a = p.prod[i+1] # Get symbol right after the "."
1895 if Terminals.has_key(a):
1896 g = goto_cache[(idI,a)]
1897 j = cid_hash.get(id(g),-1)
1899 # We are in a shift state
1901 if not acthash.has_key(_k):
1902 actlist.append((a,p,"shift and go to state %d" % j))
1904 r = action.get((st,a),None)
1906 # Whoa have a shift/reduce or shift/shift conflict
1909 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1911 # Do a precedence check.
1912 # - if precedence of reduce rule is higher, we reduce.
1913 # - if precedence of reduce is same and left assoc, we reduce.
1914 # - otherwise we shift
1915 rprec,rlevel = Productions[actionp[st,a].number].prec
1916 sprec,slevel = Precedence.get(a,('right',0))
1917 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1918 # We decide to shift here... highest precedence to shift
1921 if not slevel and not rlevel:
1923 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1924 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1925 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1928 # Hmmm. Guess we'll keep the reduce
1929 if not slevel and not rlevel:
1931 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1932 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1935 sys.stderr.write("Unknown conflict in state %d\n" % st)
1941 term_list = TReductions[nonterminal]
1942 # DB: This loop gets executed a lot. Try to optimize
1944 g = goto_cache[(idI,a)]
1947 # We are in a shift state
1948 # Don't put repeated shift actions on action list (performance hack)
1950 if not acthash.has_key(_k):
1951 actlist.append((a,p,"shift and go to state "+str(j)))
1954 r = action.get((st,a),None)
1956 # Whoa have a shift/reduce or shift/shift conflict
1959 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1962 # Do a precedence check.
1963 # - if precedence of reduce rule is higher, we reduce.
1964 # - if precedence of reduce is same and left assoc, we reduce.
1965 # - otherwise we shift
1966 rprec,rlevel = Productions[actionp[st,a].number].prec
1967 sprec,slevel = Precedence.get(a,('right',0))
1968 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1969 # We decide to shift here... highest precedence to shift
1972 if not slevel and not rlevel:
1974 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1975 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1976 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1979 # Hmmm. Guess we'll keep the reduce
1980 if not slevel and not rlevel:
1982 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1983 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1986 sys.stderr.write("Unknown conflict in state %d\n" % st)
1991 except StandardError,e:
1992 raise YaccError, "Hosed in lalr_parse_table", e
1994 # Print the actions associated with each terminal
1996 for a,p,m in actlist:
1997 if action.has_key((st,a)):
1998 if p is actionp[st,a]:
1999 _vf.write(" %-15s %s\n" % (a,m))
2002 for a,p,m in actlist:
2003 if action.has_key((st,a)):
2004 if p is not actionp[st,a]:
2005 _vf.write(" ! %-15s [ %s ]\n" % (a,m))
2007 # Construct the goto table for this state
2011 if Nonterminals.has_key(s):
2014 # Construct the goto table for this state
2015 for n in nkeys.keys():
2017 j = cid_hash.get(id(g),-1)
2021 _vf.write(" %-30s shift and go to state %d\n" % (n,j))
2025 if n_srconflict == 1:
2026 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
2027 if n_srconflict > 1:
2028 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
2029 if n_rrconflict == 1:
2030 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
2031 if n_rrconflict > 1:
2032 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
2035 # -----------------------------------------------------------------------------
2036 # ==== LR Utility functions ====
2037 # -----------------------------------------------------------------------------
2039 # -----------------------------------------------------------------------------
2040 # _lr_write_tables()
2042 # This function writes the LR parsing tables to a file
2043 # -----------------------------------------------------------------------------
2045 def lr_write_tables(modulename=tab_module,outputdir=''):
2046 filename = os.path.join(outputdir,modulename) + ".py"
2048 f = open(filename,"w")
2052 # This file is automatically generated. Do not edit.
2057 """ % (filename, repr(_lr_method), repr(Signature.digest())))
2059 # Change smaller to 0 to go back to original tables
2062 # Factor out names to try and make smaller
2066 for k,v in _lr_action.items():
2074 f.write("\n_lr_action_items = {")
2075 for k,v in items.items():
2076 f.write("%r:([" % k)
2088 for _k, _v in _lr_action_items.items():
2089 for _x,_y in zip(_v[0],_v[1]):
2090 _lr_action[(_x,_k)] = _y
2091 del _lr_action_items
2095 f.write("\n_lr_action = { ");
2096 for k,v in _lr_action.items():
2097 f.write("(%r,%r):%r," % (k[0],k[1],v))
2101 # Factor out names to try and make smaller
2104 for k,v in _lr_goto.items():
2112 f.write("\n_lr_goto_items = {")
2113 for k,v in items.items():
2114 f.write("%r:([" % k)
2126 for _k, _v in _lr_goto_items.items():
2127 for _x,_y in zip(_v[0],_v[1]):
2128 _lr_goto[(_x,_k)] = _y
2132 f.write("\n_lr_goto = { ");
2133 for k,v in _lr_goto.items():
2134 f.write("(%r,%r):%r," % (k[0],k[1],v))
2137 # Write production table
2138 f.write("_lr_productions = [\n")
2139 for p in Productions:
2142 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
2144 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len))
2151 print "Unable to create '%s'" % filename
2155 def lr_read_tables(module=tab_module,optimize=0):
2156 global _lr_action, _lr_goto, _lr_productions, _lr_method
2158 exec "import %s as parsetab" % module
2160 if (optimize) or (Signature.digest() == parsetab._lr_signature):
2161 _lr_action = parsetab._lr_action
2162 _lr_goto = parsetab._lr_goto
2163 _lr_productions = parsetab._lr_productions
2164 _lr_method = parsetab._lr_method
2169 except (ImportError,AttributeError):
2173 # Available instance types. This is used when parsers are defined by a class.
2174 # it's a little funky because I want to preserve backwards compatibility
2175 # with Python 2.0 where types.ObjectType is undefined.
2178 _INSTANCETYPE = (types.InstanceType, types.ObjectType)
2179 except AttributeError:
2180 _INSTANCETYPE = types.InstanceType
2182 # -----------------------------------------------------------------------------
2185 # Build the parser module
2186 # -----------------------------------------------------------------------------
2188 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
2196 # Add starting symbol to signature
2198 Signature.update(start)
2200 # Add parsing method to signature
2201 Signature.update(method)
2203 # If a "module" parameter was supplied, extract its dictionary.
2204 # Note: a module may in fact be an instance as well.
2207 # User supplied a module object.
2208 if isinstance(module, types.ModuleType):
2209 ldict = module.__dict__
2210 elif isinstance(module, _INSTANCETYPE):
2211 _items = [(k,getattr(module,k)) for k in dir(module)]
2216 raise ValueError,"Expected a module"
2219 # No module given. We might be able to get information from the caller.
2220 # Throw an exception and unwind the traceback to get the globals
2224 except RuntimeError:
2225 e,b,t = sys.exc_info()
2227 f = f.f_back # Walk out to our calling function
2228 ldict = f.f_globals # Grab its globals dictionary
2230 # If running in optimized mode. We're going to
2232 if (optimize and lr_read_tables(tabmodule,1)):
2235 for p in _lr_productions:
2237 Productions.append(None)
2239 m = MiniProduction()
2245 m.func = ldict[p[2]]
2246 Productions.append(m)
2249 # Get the tokens map
2250 if (module and isinstance(module,_INSTANCETYPE)):
2251 tokens = getattr(module,"tokens",None)
2253 tokens = ldict.get("tokens",None)
2256 raise YaccError,"module does not define a list 'tokens'"
2257 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
2258 raise YaccError,"tokens must be a list or tuple."
2260 # Check to see if a requires dictionary is defined.
2261 requires = ldict.get("require",None)
2263 if not (isinstance(requires,types.DictType)):
2264 raise YaccError,"require must be a dictionary."
2266 for r,v in requires.items():
2268 if not (isinstance(v,types.ListType)):
2270 v1 = [x.split(".") for x in v]
2272 except StandardError:
2273 print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
2276 # Build the dictionary of terminals. We a record a 0 in the
2277 # dictionary to track whether or not a terminal is actually
2278 # used in the grammar
2280 if 'error' in tokens:
2281 print "yacc: Illegal token 'error'. Is a reserved word."
2282 raise YaccError,"Illegal token name"
2285 if Terminals.has_key(n):
2286 print "yacc: Warning. Token '%s' multiply defined." % n
2289 Terminals['error'] = [ ]
2291 # Get the precedence map (if any)
2292 prec = ldict.get("precedence",None)
2294 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
2295 raise YaccError,"precedence must be a list or tuple."
2296 add_precedence(prec)
2297 Signature.update(repr(prec))
2300 if not Precedence.has_key(n):
2301 Precedence[n] = ('right',0) # Default, right associative, 0 precedence
2303 # Look for error handler
2304 ef = ldict.get('p_error',None)
2306 if isinstance(ef,types.FunctionType):
2308 elif isinstance(ef, types.MethodType):
2311 raise YaccError,"'p_error' defined, but is not a function or method."
2312 eline = ef.func_code.co_firstlineno
2313 efile = ef.func_code.co_filename
2316 if (ef.func_code.co_argcount != 1+ismethod):
2317 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
2321 print "yacc: Warning. no p_error() function is defined."
2323 # Get the list of built-in functions with p_ prefix
2324 symbols = [ldict[f] for f in ldict.keys()
2325 if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_'
2326 and ldict[f].__name__ != 'p_error')]
2328 # Check for non-empty symbols
2329 if len(symbols) == 0:
2330 raise YaccError,"no rules of the form p_rulename are defined."
2332 # Sort the symbols by line number
2333 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
2335 # Add all of the symbols to the grammar
2337 if (add_function(f)) < 0:
2340 files[f.func_code.co_filename] = None
2342 # Make a signature of the docstrings
2345 Signature.update(f.__doc__)
2350 raise YaccError,"Unable to construct parser."
2352 if not lr_read_tables(tabmodule):
2355 for filename in files.keys():
2356 if not validate_file(filename):
2359 # Validate dictionary
2360 validate_dict(ldict)
2362 if start and not Prodnames.has_key(start):
2363 raise YaccError,"Bad starting symbol '%s'" % start
2365 augment_grammar(start)
2366 error = verify_productions(cycle_check=check_recursion)
2367 otherfunc = [ldict[f] for f in ldict.keys()
2368 if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
2371 raise YaccError,"Unable to construct parser."
2375 compute_follow(start)
2379 elif method == 'LALR':
2382 raise YaccError, "Unknown parsing method '%s'" % method
2385 lr_write_tables(tabmodule,outputdir)
2389 f = open(os.path.join(outputdir,debugfile),"w")
2390 f.write(_vfc.getvalue())
2392 f.write(_vf.getvalue())
2395 print "yacc: can't create '%s'" % debugfile,e
2397 # Made it here. Create a parser object and set up its internal state.
2398 # Set global parse() method to bound method of parser object.
2401 p.productions = Productions
2402 p.errorfunc = Errorfunc
2403 p.action = _lr_action
2405 p.method = _lr_method
2406 p.require = Requires
2411 # Clean up all of the globals we created
2416 # yacc_cleanup function. Delete all of the global variables
2417 # used during table construction
2420 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2421 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2423 global Productions, Prodnames, Prodmap, Terminals
2424 global Nonterminals, First, Follow, Precedence, LRitems
2425 global Errorfunc, Signature, Requires
2426 global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
2428 del Productions, Prodnames, Prodmap, Terminals
2429 del Nonterminals, First, Follow, Precedence, LRitems
2430 del Errorfunc, Signature, Requires
2431 del Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
2437 # Stub that raises an error if parsing is attempted without first calling yacc()
2438 def parse(*args,**kwargs):
2439 raise YaccError, "yacc: No parser built with yacc()"