1 #-----------------------------------------------------------------------------
4 # Author(s): David M. Beazley (dave@dabeaz.com)
6 # Copyright (C) 2001-2005, David M. Beazley
8 # $Header: /cvs/projects/PLY/yacc.py,v 1.6 2004/05/26 20:51:34 beazley Exp $
10 # This library is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public
12 # License as published by the Free Software Foundation; either
13 # version 2.1 of the License, or (at your option) any later version.
15 # This library is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 # Lesser General Public License for more details.
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with this library; if not, write to the Free Software
22 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 # See the file COPYING for a complete copy of the LGPL.
27 # This implements an LR parser that is constructed from grammar rules defined
28 # as Python functions. Roughly speaking, this module is a cross between
29 # John Aycock's Spark system and the GNU bison utility.
31 # The current implementation is only somewhat object-oriented. The
32 # LR parser itself is defined in terms of an object (which allows multiple
33 # parsers to co-exist). However, most of the variables used during table
34 # construction are defined in terms of global variables. Users shouldn't
35 # notice unless they are trying to define multiple parsers at the same
36 # time using threads (in which case they should have their head examined).
38 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
39 # support was implemented by Elias Ioup (ezioup@alumni.uchicago.edu)
40 # and hacked abit by Dave to run faster.
42 # :::::::: WARNING :::::::
44 # Construction of LR parsing tables is fairly complicated and expensive.
45 # To make this module run fast, a *LOT* of work has been put into
46 # optimization---often at the expensive of readability and what might
47 # consider to be good Python "coding style." Modify the code at your
49 # ----------------------------------------------------------------------------
53 #-----------------------------------------------------------------------------
54 # === User configurable parameters ===
56 # Change these to modify the default behavior of yacc (if you wish)
57 #-----------------------------------------------------------------------------
59 yaccdebug = 1 # Debugging mode. If set, yacc generates a
60 # a 'parser.out' file in the current directory
62 debug_file = 'parser.out' # Default name of the debugging file
63 tab_module = 'parsetab' # Default name of the table module
64 default_lr = 'SLR' # Default LR table generation method
66 error_count = 3 # Number of symbols that must be shifted to leave recovery mode
68 import re, types, sys, cStringIO, md5, os.path
70 # Exception raised for yacc-related errors
71 class YaccError(Exception): pass
73 #-----------------------------------------------------------------------------
74 # === LR Parsing Engine ===
76 # The following classes are used for the LR parser itself. These are not
77 # used during table construction and are independent of the actual LR
78 # table generation algorithm
79 #-----------------------------------------------------------------------------
81 # This class is used to hold non-terminal grammar symbols during parsing.
82 # It normally has the following attributes set:
83 # .type = Grammar symbol type
84 # .value = Symbol value
85 # .lineno = Starting line number
86 # .endlineno = Ending line number (optional, set automatically)
89 def __str__(self): return self.type
90 def __repr__(self): return str(self)
92 # This class is a wrapper around the objects actually passed to each
93 # grammar rule. Index lookup and assignment actually assign the
94 # .value attribute of the underlying YaccSymbol object.
95 # The lineno() method returns the line number of a given
96 # item (or 0 if not defined). The linespan() method returns
97 # a tuple of (startline,endline) representing the range of lines
100 class YaccProduction:
101 def __init__(self,s):
105 def __getitem__(self,n):
106 return self.slice[n].value
108 def __setitem__(self,n,v):
109 self.slice[n].value = v
112 return len(self.slice)
115 return getattr(self.slice[n],"lineno",0)
117 def linespan(self,n):
118 startline = getattr(self.slice[n],"lineno",0)
119 endline = getattr(self.slice[n],"endlineno",startline)
120 return startline,endline
122 def pushback(self,n):
124 raise ValueError, "Expected a positive value"
125 if n > (len(self.slice)-1):
126 raise ValueError, "Can't push %d tokens. Only %d are available." % (n,len(self.slice)-1)
128 self.pbstack.append(self.slice[-i-1])
130 # The LR Parsing engine. This is defined as a class so that multiple parsers
131 # can exist in the same process. A user never instantiates this directly.
132 # Instead, the global yacc() function should be used to create a suitable Parser
136 def __init__(self,magic=None):
138 # This is a hack to keep users from trying to instantiate a Parser
142 raise YaccError, "Can't instantiate Parser. Use yacc() instead."
144 # Reset internal state
145 self.productions = None # List of productions
146 self.errorfunc = None # Error handling function
147 self.action = { } # LR Action table
148 self.goto = { } # LR goto table
149 self.require = { } # Attribute require table
150 self.method = "Unknown LR" # Table construction method used
156 del self.statestack[:]
160 self.symstack.append(sym)
161 self.statestack.append(0)
163 def parse(self,input=None,lexer=None,debug=0):
164 lookahead = None # Current lookahead symbol
165 lookaheadstack = [ ] # Stack of lookahead symbols
166 actions = self.action # Local reference to action table
167 goto = self.goto # Local reference to goto table
168 prod = self.productions # Local reference to production list
169 pslice = YaccProduction(None) # Production object passed to grammar rules
170 pslice.parser = self # Parser object
171 self.errorcount = 0 # Used during error recovery
173 # If no lexer was given, we will try to use the lex module
179 # If input was supplied, pass to lexer
184 get_token = lexer.token
186 statestack = [ ] # Stack of parsing states
187 self.statestack = statestack
188 symstack = [ ] # Stack of grammar symbols
189 self.symstack = symstack
191 errtoken = None # Err token
193 # The start state is assumed to be (0,$)
200 # Get the next symbol on the input. If a lookahead symbol
201 # is already set, we just use that. Otherwise, we'll pull
202 # the next token off of the lookaheadstack or from the lexer
204 if not lookaheadstack:
205 lookahead = get_token() # Get the next token
207 lookahead = lookaheadstack.pop()
209 lookahead = YaccSymbol()
212 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
214 # Check the action table
216 ltype = lookahead.type
217 t = actions.get((s,ltype),None)
221 # shift a symbol on the stack
223 # Error, end of input
224 sys.stderr.write("yacc: Parse error. EOF\n")
228 sys.stderr.write("%-60s shift state %s\n" % (errorlead, t))
229 symstack.append(lookahead)
232 # Decrease error count on successful shift
233 if self.errorcount > 0:
239 # reduce a symbol on the stack, emit a production
244 # Get production function
246 sym.type = pname # Production name
249 sys.stderr.write("%-60s reduce %d\n" % (errorlead, -t))
252 targ = symstack[-plen-1:]
255 sym.lineno = targ[1].lineno
256 sym.endlineno = getattr(targ[-1],"endlineno",targ[-1].lineno)
257 except AttributeError:
260 del statestack[-plen:]
266 # Call the grammar rule with our special slice object
269 # If there was a pushback, put that on the stack
271 lookaheadstack.append(lookahead)
272 for _t in pslice.pbstack:
273 lookaheadstack.append(_t)
277 statestack.append(goto[statestack[-1],pname])
282 return getattr(n,"value",None)
283 sys.stderr.write(errorlead, "\n")
287 sys.stderr.write(errorlead + "\n")
288 # We have some kind of parsing error here. To handle
289 # this, we are going to push the current token onto
290 # the tokenstack and replace it with an 'error' token.
291 # If there are any synchronization rules, they may
294 # In addition to pushing the error token, we call call
295 # the user defined p_error() function if this is the
296 # first syntax error. This function is only called if
298 if not self.errorcount:
299 self.errorcount = error_count
301 if errtoken.type == '$':
302 errtoken = None # End of file!
304 global errok,token,restart
305 errok = self.errok # Set some special functions available in error recovery
307 restart = self.restart
308 tok = self.errorfunc(errtoken)
309 del errok, token, restart # Delete special functions
311 if not self.errorcount:
312 # User must have done some kind of panic
313 # mode recovery on their own. The
314 # returned token is the next lookahead
320 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
323 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
325 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
327 sys.stderr.write("yacc: Parse error in input. EOF\n")
331 self.errorcount = error_count
333 # case 1: the statestack only has 1 entry on it. If we're in this state, the
334 # entire parse has been rolled back and we're completely hosed. The token is
335 # discarded and we just keep going.
337 if len(statestack) <= 1 and lookahead.type != '$':
340 # Nuke the pushback stack
341 del lookaheadstack[:]
344 # case 2: the statestack has a couple of entries on it, but we're
345 # at the end of the file. nuke the top entry and generate an error token
347 # Start nuking entries on the stack
348 if lookahead.type == '$':
349 # Whoa. We're really hosed here. Bail out
352 if lookahead.type != 'error':
354 if sym.type == 'error':
355 # Hmmm. Error is on top of stack, we'll just nuke input
356 # symbol and continue
361 if hasattr(lookahead,"lineno"):
362 t.lineno = lookahead.lineno
364 lookaheadstack.append(lookahead)
372 # Call an error function here
373 raise RuntimeError, "yacc: internal parser error!!!\n"
375 # -----------------------------------------------------------------------------
376 # === Parser Construction ===
378 # The following functions and variables are used to implement the yacc() function
379 # itself. This is pretty hairy stuff involving lots of error checking,
380 # construction of LR items, kernels, and so forth. Although a lot of
381 # this work is done using global variables, the resulting Parser object
382 # is completely self contained--meaning that it is safe to repeatedly
383 # call yacc() with different grammars in the same application.
384 # -----------------------------------------------------------------------------
386 # -----------------------------------------------------------------------------
389 # This function checks to see if there are duplicated p_rulename() functions
390 # in the parser module file. Without this function, it is really easy for
391 # users to make mistakes by cutting and pasting code fragments (and it's a real
392 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
393 # we just do a little regular expression pattern matching of def statements
394 # to try and detect duplicates.
395 # -----------------------------------------------------------------------------
397 def validate_file(filename):
398 base,ext = os.path.splitext(filename)
399 if ext != '.py': return 1 # No idea. Assume it's okay.
403 lines = f.readlines()
408 # Match def p_funcname(
409 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
417 prev = counthash.get(name)
419 counthash[name] = linen
421 sys.stderr.write("%s:%d: Function %s redefined. Previously defined on line %d\n" % (filename,linen,name,prev))
426 # This function looks for functions that might be grammar rules, but which don't have the proper p_suffix.
427 def validate_dict(d):
428 for n,v in d.items():
429 if n[0:2] == 'p_' and type(v) in (types.FunctionType, types.MethodType): continue
430 if n[0:2] == 't_': continue
433 sys.stderr.write("yacc: Warning. '%s' not defined as a function\n" % n)
434 if 1 and isinstance(v,types.FunctionType) and v.func_code.co_argcount == 1:
436 doc = v.__doc__.split(" ")
438 sys.stderr.write("%s:%d: Warning. Possible grammar rule '%s' defined without p_ prefix.\n" % (v.func_code.co_filename, v.func_code.co_firstlineno,n))
439 except StandardError:
442 # -----------------------------------------------------------------------------
443 # === GRAMMAR FUNCTIONS ===
445 # The following global variables and functions are used to store, manipulate,
446 # and verify the grammar rules specified by the user.
447 # -----------------------------------------------------------------------------
449 # Initialize all of the global variables used during grammar construction
450 def initialize_vars():
451 global Productions, Prodnames, Prodmap, Terminals
452 global Nonterminals, First, Follow, Precedence, LRitems
453 global Errorfunc, Signature, Requires
456 global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
458 Productions = [None] # A list of all of the productions. The first
459 # entry is always reserved for the purpose of
460 # building an augmented grammar
462 Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
463 # productions of that nonterminal.
465 Prodmap = { } # A dictionary that is only used to detect duplicate
468 Terminals = { } # A dictionary mapping the names of terminal symbols to a
469 # list of the rules where they are used.
471 Nonterminals = { } # A dictionary mapping names of nonterminals to a list
472 # of rule numbers where they are used.
474 First = { } # A dictionary of precomputed FIRST(x) symbols
476 Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
478 Precedence = { } # Precedence rules for each terminal. Contains tuples of the
479 # form ('right',level) or ('nonassoc', level) or ('left',level)
481 LRitems = [ ] # A list of all LR items for the grammar. These are the
482 # productions with the "dot" like E -> E . PLUS E
484 Errorfunc = None # User defined error handler
486 Signature = md5.new() # Digital signature of the grammar rules, precedence
487 # and other information. Used to determined when a
488 # parsing table needs to be regenerated.
490 Requires = { } # Requires list
492 # LALR(1) Initialization
493 Prodempty = { } # A dictionary of all productions that have an empty rule
494 # of the form P : <empty>
496 TReductions = { } # A dictionary of precomputer reductions from
497 # nonterminals to terminals
499 NTReductions = { } # A dictionary of precomputed reductions from
500 # nonterminals to nonterminals
502 GotoSetNum = { } # A dictionary that remembers goto sets based on
503 # the state number and symbol
505 Canonical = { } # A list of LR item sets. A LR item set is a list of LR
506 # items that represent the state of the parser
508 # File objects used when creating the parser.out debugging file
510 _vf = cStringIO.StringIO()
511 _vfc = cStringIO.StringIO()
513 # -----------------------------------------------------------------------------
516 # This class stores the raw information about a single production or grammar rule.
517 # It has a few required attributes:
519 # name - Name of the production (nonterminal)
520 # prod - A list of symbols making up its production
521 # number - Production number.
523 # In addition, a few additional attributes are used to help with debugging or
524 # optimization of table generation.
526 # file - File where production action is defined.
527 # lineno - Line number where action is defined
528 # func - Action function
529 # prec - Precedence level
530 # lr_next - Next LR item. Example, if we are ' E -> E . PLUS E'
531 # then lr_next refers to 'E -> E PLUS . E'
532 # lr_index - LR item index (location of the ".") in the prod list.
533 # lookaheads - LALR lookahead symbols for this item
534 # len - Length of the production (number of symbols on right hand side)
535 # -----------------------------------------------------------------------------
538 def __init__(self,**kw):
539 for k,v in kw.items():
542 self.lr0_added = 0 # Flag indicating whether or not added to LR0 closure
543 self.lr1_added = 0 # Flag indicating whether or not added to LR1
545 self.lookaheads = { }
547 self.setnumbers = [ ]
551 s = "%s -> %s" % (self.name," ".join(self.prod))
553 s = "%s -> <empty>" % self.name
559 # Compute lr_items from the production
561 if n > len(self.prod): return None
564 p.prod = list(self.prod)
565 p.number = self.number
568 p.setnumbers = self.setnumbers
570 p.prod = tuple(p.prod)
574 # Precompute list of productions immediately following
576 p.lrafter = Prodnames[p.prod[n+1]]
577 except (IndexError,KeyError),e:
580 p.lrbefore = p.prod[n-1]
586 class MiniProduction:
590 def is_identifier(s):
592 if not (c.isalnum() or c == '_'): return 0
595 # -----------------------------------------------------------------------------
598 # Given an action function, this function assembles a production rule.
599 # The production rule is assumed to be found in the function's docstring.
600 # This rule has the general syntax:
602 # name1 ::= production1
607 # name2 ::= production1
610 # -----------------------------------------------------------------------------
612 def add_production(f,file,line,prodname,syms):
614 if Terminals.has_key(prodname):
615 sys.stderr.write("%s:%d: Illegal rule name '%s'. Already defined as a token.\n" % (file,line,prodname))
617 if prodname == 'error':
618 sys.stderr.write("%s:%d: Illegal rule name '%s'. error is a reserved word.\n" % (file,line,prodname))
621 if not is_identifier(prodname):
622 sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
626 if not is_identifier(s) and s != '%prec':
627 sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
630 # See if the rule is already in the rulemap
631 map = "%s -> %s" % (prodname,syms)
632 if Prodmap.has_key(map):
634 sys.stderr.write("%s:%d: Duplicate rule %s.\n" % (file,line, m))
635 sys.stderr.write("%s:%d: Previous definition at %s:%d\n" % (file,line, m.file, m.line))
644 p.number = len(Productions)
647 Productions.append(p)
649 if not Nonterminals.has_key(prodname):
650 Nonterminals[prodname] = [ ]
652 # Add all terminals to Terminals
654 while i < len(p.prod):
658 precname = p.prod[i+1]
660 sys.stderr.write("%s:%d: Syntax error. Nothing follows %%prec.\n" % (p.file,p.line))
663 prec = Precedence.get(precname,None)
665 sys.stderr.write("%s:%d: Nothing known about the precedence of '%s'\n" % (p.file,p.line,precname))
673 if Terminals.has_key(t):
674 Terminals[t].append(p.number)
675 # Is a terminal. We'll assign a precedence to p based on this
676 if not hasattr(p,"prec"):
677 p.prec = Precedence.get(t,('right',0))
679 if not Nonterminals.has_key(t):
680 Nonterminals[t] = [ ]
681 Nonterminals[t].append(p.number)
684 if not hasattr(p,"prec"):
687 # Set final length of productions
689 p.prod = tuple(p.prod)
691 # Calculate unique syms in the production
697 # Add to the global productions list
699 Prodnames[p.name].append(p)
701 Prodnames[p.name] = [ p ]
704 # Given a raw rule function, this function rips out its doc string
705 # and adds rules to the grammar
708 line = f.func_code.co_firstlineno
709 file = f.func_code.co_filename
712 if isinstance(f,types.MethodType):
717 if f.func_code.co_argcount > reqdargs:
718 sys.stderr.write("%s:%d: Rule '%s' has too many arguments.\n" % (file,line,f.__name__))
721 if f.func_code.co_argcount < reqdargs:
722 sys.stderr.write("%s:%d: Rule '%s' requires an argument.\n" % (file,line,f.__name__))
726 # Split the doc string into lines
727 pstrings = f.__doc__.splitlines()
736 # This is a continuation of a previous rule
738 sys.stderr.write("%s:%d: Misplaced '|'.\n" % (file,dline))
753 if assign != ':' and assign != '::=':
754 sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
756 e = add_production(f,file,dline,prodname,syms)
758 except StandardError:
759 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
762 sys.stderr.write("%s:%d: No documentation string specified in function '%s'\n" % (file,line,f.__name__))
766 # Cycle checking code (Michael Dyck)
768 def compute_reachable():
770 Find each symbol that can be reached from the start symbol.
771 Print a warning for any nonterminals that can't be reached.
772 (Unused terminals have already had their warning.)
775 for s in Terminals.keys() + Nonterminals.keys():
778 mark_reachable_from( Productions[0].prod[0], Reachable )
780 for s in Nonterminals.keys():
782 sys.stderr.write("yacc: Symbol '%s' is unreachable.\n" % s)
784 def mark_reachable_from(s, Reachable):
786 Mark all symbols that are reachable from symbol s.
789 # We've already reached symbol s.
792 for p in Prodnames.get(s,[]):
794 mark_reachable_from(r, Reachable)
796 # -----------------------------------------------------------------------------
797 # compute_terminates()
799 # This function looks at the various parsing rules and tries to detect
800 # infinite recursion cycles (grammar rules where there is no possible way
801 # to derive a string of only terminals).
802 # -----------------------------------------------------------------------------
803 def compute_terminates():
805 Raise an error for any symbols that don't terminate.
810 for t in Terminals.keys():
817 # Initialize to false:
818 for n in Nonterminals.keys():
821 # Then propagate termination until no change:
824 for (n,pl) in Prodnames.items():
825 # Nonterminal n terminates iff any of its productions terminates.
827 # Production p terminates iff all of its rhs symbols terminate.
829 if not Terminates[s]:
830 # The symbol s does not terminate,
831 # so production p does not terminate.
835 # didn't break from the loop,
836 # so every symbol s terminates
837 # so production p terminates.
841 # symbol n terminates!
842 if not Terminates[n]:
845 # Don't need to consider any more productions for this n.
852 for (s,terminates) in Terminates.items():
854 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
855 # s is used-but-not-defined, and we've already warned of that,
856 # so it would be overkill to say that it's also non-terminating.
859 sys.stderr.write("yacc: Infinite recursion detected for symbol '%s'.\n" % s)
864 # -----------------------------------------------------------------------------
865 # verify_productions()
867 # This function examines all of the supplied rules to see if they seem valid.
868 # -----------------------------------------------------------------------------
869 def verify_productions(cycle_check=1):
871 for p in Productions:
875 if not Prodnames.has_key(s) and not Terminals.has_key(s) and s != 'error':
876 sys.stderr.write("%s:%d: Symbol '%s' used, but not defined as a token or a rule.\n" % (p.file,p.line,s))
881 # Now verify all of the tokens
883 _vf.write("Unused terminals:\n\n")
884 for s,v in Terminals.items():
885 if s != 'error' and not v:
886 sys.stderr.write("yacc: Warning. Token '%s' defined, but not used.\n" % s)
887 if yaccdebug: _vf.write(" %s\n"% s)
890 # Print out all of the productions
892 _vf.write("\nGrammar\n\n")
893 for i in range(1,len(Productions)):
894 _vf.write("Rule %-5d %s\n" % (i, Productions[i]))
897 # Verify the use of all productions
898 for s,v in Nonterminals.items():
901 sys.stderr.write("%s:%d: Warning. Rule '%s' defined, but not used.\n" % (p.file,p.line, s))
906 sys.stderr.write("yacc: Warning. There is 1 unused token.\n")
908 sys.stderr.write("yacc: Warning. There are %d unused tokens.\n" % unused_tok)
911 sys.stderr.write("yacc: Warning. There is 1 unused rule.\n")
913 sys.stderr.write("yacc: Warning. There are %d unused rules.\n" % unused_prod)
916 _vf.write("\nTerminals, with rules where they appear\n\n")
917 ks = Terminals.keys()
920 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Terminals[k]])))
921 _vf.write("\nNonterminals, with rules where they appear\n\n")
922 ks = Nonterminals.keys()
925 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Nonterminals[k]])))
929 error += compute_terminates()
930 # error += check_cycles()
933 # -----------------------------------------------------------------------------
936 # This function walks the list of productions and builds a complete set of the
937 # LR items. The LR items are stored in two ways: First, they are uniquely
938 # numbered and placed in the list _lritems. Second, a linked list of LR items
939 # is built for each production. For example:
945 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
946 # -----------------------------------------------------------------------------
949 for p in Productions:
955 lastlri.lr_next = lri
957 lri.lr_num = len(LRitems)
962 # In order for the rest of the parser generator to work, we need to
963 # guarantee that no more lritems are generated. Therefore, we nuke
964 # the p.lr_item method. (Only used in debugging)
965 # Production.lr_item = None
967 # -----------------------------------------------------------------------------
970 # Given a list of precedence rules, add to the precedence table.
971 # -----------------------------------------------------------------------------
973 def add_precedence(plist):
981 if prec != 'left' and prec != 'right' and prec != 'nonassoc':
982 sys.stderr.write("yacc: Invalid precedence '%s'\n" % prec)
985 if Precedence.has_key(t):
986 sys.stderr.write("yacc: Precedence already specified for terminal '%s'\n" % t)
989 Precedence[t] = (prec,plevel)
991 sys.stderr.write("yacc: Invalid precedence table.\n")
996 # -----------------------------------------------------------------------------
999 # Compute the augmented grammar. This is just a rule S' -> start where start
1000 # is the starting symbol.
1001 # -----------------------------------------------------------------------------
1003 def augment_grammar(start=None):
1005 start = Productions[1].name
1006 Productions[0] = Production(name="S'",prod=[start],number=0,len=1,prec=('right',0),func=None)
1007 Productions[0].usyms = [ start ]
1008 Nonterminals[start].append(0)
1011 # -------------------------------------------------------------------------
1014 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1016 # During execution of compute_first1, the result may be incomplete.
1017 # Afterward (e.g., when called from compute_follow()), it will be complete.
1018 # -------------------------------------------------------------------------
1021 # We are computing First(x1,x2,x3,...,xn)
1024 x_produces_empty = 0
1026 # Add all the non-<empty> symbols of First[x] to the result.
1029 x_produces_empty = 1
1031 if f not in result: result.append(f)
1033 if x_produces_empty:
1034 # We have to consider the next x in beta,
1035 # i.e. stay in the loop.
1038 # We don't have to consider any further symbols in beta.
1041 # There was no 'break' from the loop,
1042 # so x_produces_empty was true for all x in beta,
1043 # so beta produces empty as well.
1044 result.append('<empty>')
1050 # Given a non-terminal. This function computes the set of all symbols
1051 # that might follow it. Dragon book, p. 189.
1053 def compute_follow(start=None):
1054 # Add '$' to the follow list of the start symbol
1055 for k in Nonterminals.keys():
1059 start = Productions[1].name
1061 Follow[start] = [ '$' ]
1065 for p in Productions[1:]:
1066 # Here is the production set
1067 for i in range(len(p.prod)):
1069 if Nonterminals.has_key(B):
1070 # Okay. We got a non-terminal in a production
1071 fst = first(p.prod[i+1:])
1074 if f != '<empty>' and f not in Follow[B]:
1079 if hasempty or i == (len(p.prod)-1):
1080 # Add elements of follow(a) to follow(b)
1081 for f in Follow[p.name]:
1082 if f not in Follow[B]:
1085 if not didadd: break
1088 _vf.write('\nFollow:\n')
1089 for k in Nonterminals.keys():
1090 _vf.write("%-20s : %s\n" % (k, " ".join([str(s) for s in Follow[k]])))
1092 # -------------------------------------------------------------------------
1095 # Compute the value of FIRST1(X) for all symbols
1096 # -------------------------------------------------------------------------
1097 def compute_first1():
1100 for t in Terminals.keys():
1104 First['#'] = ['#'] # what's this for?
1108 # Initialize to the empty set:
1109 for n in Nonterminals.keys():
1112 # Then propagate symbols until no change:
1115 for n in Nonterminals.keys():
1116 for p in Prodnames[n]:
1117 for f in first(p.prod):
1118 if f not in First[n]:
1119 First[n].append( f )
1125 _vf.write('\nFirst:\n')
1126 for k in Nonterminals.keys():
1127 _vf.write("%-20s : %s\n" %
1128 (k, " ".join([str(s) for s in First[k]])))
1130 # -----------------------------------------------------------------------------
1131 # === SLR Generation ===
1133 # The following functions are used to construct SLR (Simple LR) parsing tables
1134 # as described on p.221-229 of the dragon book.
1135 # -----------------------------------------------------------------------------
1137 # Global variables for the LR parsing engine
1139 global _lr_action, _lr_goto, _lr_method
1140 global _lr_goto_cache
1142 _lr_action = { } # Action table
1143 _lr_goto = { } # Goto table
1144 _lr_method = "Unknown" # LR method used
1145 _lr_goto_cache = { }
1147 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1148 # prodlist is a list of productions.
1150 _add_count = 0 # Counter used to detect cycles
1156 prodlist = Productions
1158 # Add everything in I to J
1165 if x.lr0_added == _add_count: continue
1168 x.lr0_added = _add_count
1173 # Compute the LR(0) goto function goto(I,X) where I is a set
1174 # of LR(0) items and X is a grammar symbol. This function is written
1175 # in a way that guarantees uniqueness of the generated goto sets
1176 # (i.e. the same goto set will never be returned as two different Python
1177 # objects). With uniqueness, we can later do fast set comparisons using
1178 # id(obj) instead of element-wise comparison.
1181 # First we look for a previously cached entry
1182 g = _lr_goto_cache.get((id(I),x),None)
1185 # Now we generate the goto set in a way that guarantees uniqueness
1188 s = _lr_goto_cache.get(x,None)
1191 _lr_goto_cache[x] = s
1196 if n and n.lrbefore == x:
1197 s1 = s.get(id(n),None)
1210 _lr_goto_cache[(id(I),x)] = g
1215 # Given a setnumber of an lr0 state and a symbol return the setnumber of the goto state
1216 def lr0_goto_setnumber(I_setnumber, x):
1220 if GotoSetNum.has_key((I_setnumber, x)):
1221 setnumber = GotoSetNum[(I_setnumber, x)]
1223 gset = lr0_goto(Canonical[I_setnumber], x)
1228 for i in xrange(len(gset[0].setnumbers)):
1231 if not item.setnumbers[i]:
1234 if inall and len(Canonical[i]) == gsetlen:
1236 break # Note: DB. I added this to improve performance.
1237 # Not sure if this breaks the algorithm (it doesn't appear to).
1239 GotoSetNum[(I_setnumber, x)] = setnumber
1243 # Compute the kernel of a set of LR(0) items
1247 if p.name == "S'" or p.lr_index > 0 or p.len == 0:
1254 # Compute the LR(0) sets of item function
1257 C = [ lr0_closure([Productions[0].lr_next]) ]
1260 _lr0_cidhash[id(I)] = i
1263 # Loop over the items in C and each grammar symbols
1269 # Collect all of the symbols that could possibly be in the goto(I,X) sets
1275 for x in asyms.keys():
1278 if _lr0_cidhash.has_key(id(g)): continue
1279 _lr0_cidhash[id(g)] = len(C)
1284 # -----------------------------------------------------------------------------
1287 # This function constructs an SLR table.
1288 # -----------------------------------------------------------------------------
1289 def slr_parse_table():
1291 goto = _lr_goto # Goto array
1292 action = _lr_action # Action array
1293 actionp = { } # Action production array (temporary)
1301 sys.stderr.write("yacc: Generating SLR parsing table...\n")
1302 _vf.write("\n\nParsing method: SLR\n\n")
1304 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1305 # This determines the number of states
1309 # Build the parser table, state by state
1312 # Loop over each production in I
1313 actlist = [ ] # List of actions
1316 _vf.write("\nstate %d\n\n" % st)
1318 _vf.write(" (%d) %s\n" % (p.number, str(p)))
1323 if p.prod[-1] == ".":
1325 # Start symbol. Accept!
1329 # We are at the end of a production. Reduce!
1330 for a in Follow[p.name]:
1331 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1332 r = action.get((st,a),None)
1334 # Whoa. Have a shift/reduce or reduce/reduce conflict
1336 # Need to decide on shift or reduce here
1337 # By default we favor shifting. Need to add
1338 # some precedence rules here.
1339 sprec,slevel = Productions[actionp[st,a].number].prec
1340 rprec,rlevel = Precedence.get(a,('right',0))
1341 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1342 # We really need to reduce here.
1343 action[st,a] = -p.number
1345 if not slevel and not rlevel:
1346 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1347 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1349 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1352 # Hmmm. Guess we'll keep the shift
1353 if not slevel and not rlevel:
1354 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1355 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1358 # Reduce/reduce conflict. In this case, we favor the rule
1359 # that was defined first in the grammar file
1360 oldp = Productions[-r]
1361 pp = Productions[p.number]
1362 if oldp.line > pp.line:
1363 action[st,a] = -p.number
1365 # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
1367 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
1368 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
1370 sys.stderr.write("Unknown conflict in state %d\n" % st)
1372 action[st,a] = -p.number
1376 a = p.prod[i+1] # Get symbol right after the "."
1377 if Terminals.has_key(a):
1379 j = _lr0_cidhash.get(id(g),-1)
1381 # We are in a shift state
1382 actlist.append((a,p,"shift and go to state %d" % j))
1383 r = action.get((st,a),None)
1385 # Whoa have a shift/reduce or shift/shift conflict
1388 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1390 # Do a precedence check.
1391 # - if precedence of reduce rule is higher, we reduce.
1392 # - if precedence of reduce is same and left assoc, we reduce.
1393 # - otherwise we shift
1394 rprec,rlevel = Productions[actionp[st,a].number].prec
1395 sprec,slevel = Precedence.get(a,('right',0))
1396 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1397 # We decide to shift here... highest precedence to shift
1400 if not slevel and not rlevel:
1402 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1403 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1404 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1407 # Hmmm. Guess we'll keep the reduce
1408 if not slevel and not rlevel:
1410 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1411 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1414 sys.stderr.write("Unknown conflict in state %d\n" % st)
1419 except StandardError,e:
1420 raise YaccError, "Hosed in slr_parse_table", e
1422 # Print the actions associated with each terminal
1425 for a,p,m in actlist:
1426 if action.has_key((st,a)):
1427 if p is actionp[st,a]:
1428 _vf.write(" %-15s %s\n" % (a,m))
1429 _actprint[(a,m)] = 1
1431 for a,p,m in actlist:
1432 if action.has_key((st,a)):
1433 if p is not actionp[st,a]:
1434 if not _actprint.has_key((a,m)):
1435 _vf.write(" ! %-15s [ %s ]\n" % (a,m))
1436 _actprint[(a,m)] = 1
1438 # Construct the goto table for this state
1444 if Nonterminals.has_key(s):
1446 for n in nkeys.keys():
1448 j = _lr0_cidhash.get(id(g),-1)
1452 _vf.write(" %-30s shift and go to state %d\n" % (n,j))
1457 if n_srconflict == 1:
1458 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
1459 if n_srconflict > 1:
1460 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
1461 if n_rrconflict == 1:
1462 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
1463 if n_rrconflict > 1:
1464 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
1468 # -----------------------------------------------------------------------------
1469 # ==== LALR(1) Parsing ====
1470 # FINISHED! 5/20/2003 by Elias Ioup
1471 # -----------------------------------------------------------------------------
1474 # Compute the lr1_closure of a set I. I is a list of productions and setnumber
1475 # is the state that you want the lr items that are made from the to come from.
1479 def lr1_closure(I, setnumber = 0):
1484 prodlist = Productions
1486 # Add everything in I to J
1497 jlr_index = j.lr_index
1498 jprodslice = jprod[jlr_index+2:]
1500 if jlr_index < len(jprod) - 1 and Nonterminals.has_key(jprod[jlr_index+1]):
1503 if j.lk_added.setdefault(setnumber, 0) < len(j.lookaheads[setnumber]):
1504 for a in j.lookaheads[setnumber][j.lk_added[setnumber]:]:
1505 # find b in FIRST(Xa) if j = [A->a.BX,a]
1506 temp_first_syms = first(jprodslice + (a,))
1507 for x in temp_first_syms:
1508 if x not in first_syms:
1509 first_syms.append(x)
1511 j.lk_added[setnumber] = len(j.lookaheads[setnumber])
1516 if x.lr_next.lookaheads.has_key(setnumber):
1517 _xlook = x.lr_next.lookaheads[setnumber]
1518 for s in first_syms:
1523 x.lr_next.lookaheads[setnumber] = first_syms
1527 if not Jhash.has_key(nid):
1533 def add_lookaheads(K):
1537 for setnumber in range(len(K)):
1538 for kitem in K[setnumber]:
1539 kitem.lookaheads[setnumber] = ['#']
1540 J = lr1_closure([kitem], setnumber)
1542 # find the lookaheads that are spontaneously created from closures
1543 # and the propogations of lookaheads between lr items
1545 if item.lr_index < len(item.prod)-1:
1546 for lookahead in item.lookaheads[setnumber]:
1547 goto_setnumber = lr0_goto_setnumber(setnumber, item.prod[item.lr_index+1])
1549 if lookahead != '#':
1550 if item.lr_next in K[goto_setnumber]:
1553 spontaneous.append((next, (lookahead, goto_setnumber)))
1555 if goto_setnumber > -1:
1556 if item.lr_next in K[goto_setnumber]:
1560 propogate.append(((kitem, setnumber), (next, goto_setnumber)))
1564 for x in K[setnumber]:
1565 x.lookaheads[setnumber] = []
1567 for x in spontaneous:
1568 if x[1][0] not in x[0].lookaheads[x[1][1]]:
1569 x[0].lookaheads[x[1][1]].append(x[1][0])
1571 K[0][0].lookaheads[0] = ['$']
1575 if pitems.has_key(x[0]):
1576 pitems[x[0]].append(x[1])
1579 pitems[x[0]].append(x[1])
1581 # propogate the lookaheads that were spontaneously generated
1582 # based on the propogations produced above
1590 pkey = (item, kindex)
1591 if pitems.has_key(pkey):
1592 for propogation in pitems[pkey]:
1593 gitem = propogation[0]
1594 gsetnumber = propogation[1]
1595 glookaheads = gitem.lookaheads[gsetnumber]
1596 for lookahead in item.lookaheads[kindex]:
1597 if lookahead not in glookaheads:
1598 glookaheads.append(lookahead)
1602 def ReduceNonterminals():
1608 for nt in Nonterminals.keys():
1609 TReductions[nt] = []
1610 NTReductions[nt] = []
1612 for nt in Nonterminals.keys():
1613 terms = ReduceToTerminals(nt)
1614 TReductions[nt].extend(terms)
1615 if not NTReductions.has_key(nt):
1616 ReduceToNonterminals(nt)
1620 def ReduceToTerminals(nt):
1623 reducedterminals = []
1625 for p in Prodnames[nt]:
1627 if Terminals.has_key(p.prod[0]):
1628 if p.prod[0] not in reducedterminals:
1629 reducedterminals.append(p.prod[0])
1632 terms = ReduceToTerminals(p.prod[0])
1634 if t not in reducedterminals:
1635 reducedterminals.append(t)
1637 return reducedterminals
1640 def ReduceToNonterminals(nt):
1644 reducednonterminals = []
1646 for p in Prodnames[nt]:
1648 if Nonterminals.has_key(p.prod[0]):
1649 if p.prod[0] not in reducednonterminals:
1650 reducednonterminals.append(p.prod[0])
1652 if not NTReductions.has_key(p.prod[0]):
1653 ReduceToNonterminals(p.prod[0])
1655 nterms = NTReductions[p.prod[0]]
1657 if nt not in reducednonterminals:
1658 reducednonterminals.append(nt)
1661 NTReductions[nt] = reducednonterminals
1663 # -----------------------------------------------------------------------------
1664 # lalr_parse_table()
1666 # This function constructs an LALR table.
1667 # -----------------------------------------------------------------------------
1668 def lalr_parse_table():
1670 goto = _lr_goto # Goto array
1671 action = _lr_action # Action array
1672 actionp = { } # Action production array (temporary)
1673 goto_cache = _lr_goto_cache
1674 cid_hash = _lr0_cidhash
1682 sys.stderr.write("yacc: Generating LALR(1) parsing table...\n")
1683 _vf.write("\n\nParsing method: LALR(1)\n\n")
1685 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1686 # This determines the number of states
1694 # Create the kernel states.
1699 K.append(lr0_kernel(x))
1701 y.setnumbers = setC[:]
1706 y.lookaheads[_cindex] = []
1707 y.setnumbers[_cindex] = 1
1708 _cindex = _cindex + 1
1711 # Add lookaheads to the lr items
1717 # Do the reductions for parsing first and keep them in globals
1720 ReduceNonterminals()
1727 for y in Prodempty.keys():
1728 EmptyAncestors[y] = []
1729 for x in NTReductions.items():
1731 if Prodempty.has_key(y):
1732 EmptyAncestors[y].append(x[0])
1735 # Build the parser table, state by state
1738 # Loop over each production in I
1739 actlist = [ ] # List of actions
1745 _vf.write("\nstate %d\n\n" % st)
1747 _vf.write(" (%d) %s\n" % (p.number, str(p)))
1753 if p.prod[-1] == ".":
1755 # Start symbol. Accept!
1758 elif len(p.prod) == 0:
1759 ancestors = EmptyAncestors[p.name]
1764 plist = Productions[i.name]
1766 if len(x.prod) > 0 and x.prod[0] == p.name:
1768 d = x.prod[lr_index+2:]
1769 for l in x.lookaheads.items():
1770 flist = First[tuple(n+d+[l])]
1772 if f not in input_list and f in p.lookaheads[st]:
1773 input_list.append(f)
1775 # We are at the end of a production. Reduce!
1776 #print "input_list: %s" % input_list
1777 #print "Follow[p.name]: %s" % Follow[p.name]
1778 for a in input_list:
1779 actlist.append((a,p,"reduce using rule %d (%s) " % (p.number,p)))
1780 r = action.get((st,a),None)
1782 # Whoa. Have a shift/reduce or reduce/reduce conflict
1784 # Need to decide on shift or reduce here
1785 # By default we favor shifting. Need to add
1786 # some precedence rules here.
1787 sprec,slevel = Productions[actionp[st,a].number].prec
1788 rprec,rlevel = Precedence.get(a,('right',0))
1789 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1790 # We really need to reduce here.
1791 action[st,a] = -p.number
1793 if not slevel and not rlevel:
1794 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1795 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1797 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1800 # Hmmm. Guess we'll keep the shift
1801 if not slevel and not rlevel:
1802 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1803 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1806 # Reduce/reduce conflict. In this case, we favor the rule
1807 # that was defined first in the grammar file
1808 oldp = Productions[-r]
1809 pp = Productions[p.number]
1810 if oldp.line > pp.line:
1811 action[st,a] = -p.number
1813 # print "Reduce/reduce conflict in state %d" % st
1815 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
1816 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
1818 sys.stderr.write("Unknown conflict in state %d\n" % st)
1820 action[st,a] = -p.number
1823 break # break out of the for s in K loop because we only want to make
1824 # sure that a production is in the Kernel
1827 # We are at the end of a production. Reduce!
1829 for a in p.lookaheads[st]:
1830 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
1831 r = action.get((st,a),None)
1833 # Whoa. Have a shift/reduce or reduce/reduce conflict
1835 # Need to decide on shift or reduce here
1836 # By default we favor shifting. Need to add
1837 # some precedence rules here.
1838 sprec,slevel = Productions[actionp[st,a].number].prec
1839 rprec,rlevel = Precedence.get(a,('right',0))
1840 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
1841 # We really need to reduce here.
1842 action[st,a] = -p.number
1844 if not slevel and not rlevel:
1845 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1846 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1848 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1851 # Hmmm. Guess we'll keep the shift
1852 if not slevel and not rlevel:
1853 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1854 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1857 # Reduce/reduce conflict. In this case, we favor the rule
1858 # that was defined first in the grammar file
1859 oldp = Productions[-r]
1860 pp = Productions[p.number]
1861 if oldp.line > pp.line:
1862 action[st,a] = -p.number
1864 # print "Reduce/reduce conflict in state %d" % st
1866 _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
1867 _vf.write(" ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
1869 print "Unknown conflict in state %d" % st
1871 action[st,a] = -p.number
1875 a = p.prod[i+1] # Get symbol right after the "."
1876 if Terminals.has_key(a):
1877 g = goto_cache[(idI,a)]
1878 j = cid_hash.get(id(g),-1)
1880 # We are in a shift state
1882 if not acthash.has_key(_k):
1883 actlist.append((a,p,"shift and go to state %d" % j))
1885 r = action.get((st,a),None)
1887 # Whoa have a shift/reduce or shift/shift conflict
1890 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1892 # Do a precedence check.
1893 # - if precedence of reduce rule is higher, we reduce.
1894 # - if precedence of reduce is same and left assoc, we reduce.
1895 # - otherwise we shift
1896 rprec,rlevel = Productions[actionp[st,a].number].prec
1897 sprec,slevel = Precedence.get(a,('right',0))
1898 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1899 # We decide to shift here... highest precedence to shift
1902 if not slevel and not rlevel:
1904 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1905 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1906 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1909 # Hmmm. Guess we'll keep the reduce
1910 if not slevel and not rlevel:
1912 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1913 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1916 sys.stderr.write("Unknown conflict in state %d\n" % st)
1922 term_list = TReductions[nonterminal]
1923 # DB: This loop gets executed a lot. Try to optimize
1925 g = goto_cache[(idI,a)]
1928 # We are in a shift state
1929 # Don't put repeated shift actions on action list (performance hack)
1931 if not acthash.has_key(_k):
1932 actlist.append((a,p,"shift and go to state "+str(j)))
1935 r = action.get((st,a),None)
1937 # Whoa have a shift/reduce or shift/shift conflict
1940 sys.stderr.write("Shift/shift conflict in state %d\n" % st)
1943 # Do a precedence check.
1944 # - if precedence of reduce rule is higher, we reduce.
1945 # - if precedence of reduce is same and left assoc, we reduce.
1946 # - otherwise we shift
1947 rprec,rlevel = Productions[actionp[st,a].number].prec
1948 sprec,slevel = Precedence.get(a,('right',0))
1949 if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
1950 # We decide to shift here... highest precedence to shift
1953 if not slevel and not rlevel:
1955 _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
1956 _vf.write(" ! shift/reduce conflict for %s resolved as shift.\n" % a)
1957 elif (slevel == rlevel) and (rprec == 'nonassoc'):
1960 # Hmmm. Guess we'll keep the reduce
1961 if not slevel and not rlevel:
1963 _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
1964 _vf.write(" ! shift/reduce conflict for %s resolved as reduce.\n" % a)
1967 sys.stderr.write("Unknown conflict in state %d\n" % st)
1972 except StandardError,e:
1973 raise YaccError, "Hosed in lalr_parse_table", e
1975 # Print the actions associated with each terminal
1977 for a,p,m in actlist:
1978 if action.has_key((st,a)):
1979 if p is actionp[st,a]:
1980 _vf.write(" %-15s %s\n" % (a,m))
1983 for a,p,m in actlist:
1984 if action.has_key((st,a)):
1985 if p is not actionp[st,a]:
1986 _vf.write(" ! %-15s [ %s ]\n" % (a,m))
1988 # Construct the goto table for this state
1992 if Nonterminals.has_key(s):
1995 # Construct the goto table for this state
1996 for n in nkeys.keys():
1998 j = cid_hash.get(id(g),-1)
2002 _vf.write(" %-30s shift and go to state %d\n" % (n,j))
2006 if n_srconflict == 1:
2007 sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
2008 if n_srconflict > 1:
2009 sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
2010 if n_rrconflict == 1:
2011 sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
2012 if n_rrconflict > 1:
2013 sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
2016 # -----------------------------------------------------------------------------
2017 # ==== LR Utility functions ====
2018 # -----------------------------------------------------------------------------
2020 # -----------------------------------------------------------------------------
2021 # _lr_write_tables()
2023 # This function writes the LR parsing tables to a file
2024 # -----------------------------------------------------------------------------
2026 def lr_write_tables(modulename=tab_module,outputdir=''):
2027 filename = os.path.join(outputdir,modulename) + ".py"
2029 f = open(filename,"w")
2033 # This file is automatically generated. Do not edit.
2038 """ % (filename, repr(_lr_method), repr(Signature.digest())))
2040 # Change smaller to 0 to go back to original tables
2043 # Factor out names to try and make smaller
2047 for k,v in _lr_action.items():
2055 f.write("\n_lr_action_items = {")
2056 for k,v in items.items():
2057 f.write("%r:([" % k)
2069 for _k, _v in _lr_action_items.items():
2070 for _x,_y in zip(_v[0],_v[1]):
2071 _lr_action[(_x,_k)] = _y
2072 del _lr_action_items
2076 f.write("\n_lr_action = { ");
2077 for k,v in _lr_action.items():
2078 f.write("(%r,%r):%r," % (k[0],k[1],v))
2082 # Factor out names to try and make smaller
2085 for k,v in _lr_goto.items():
2093 f.write("\n_lr_goto_items = {")
2094 for k,v in items.items():
2095 f.write("%r:([" % k)
2107 for _k, _v in _lr_goto_items.items():
2108 for _x,_y in zip(_v[0],_v[1]):
2109 _lr_goto[(_x,_k)] = _y
2113 f.write("\n_lr_goto = { ");
2114 for k,v in _lr_goto.items():
2115 f.write("(%r,%r):%r," % (k[0],k[1],v))
2118 # Write production table
2119 f.write("_lr_productions = [\n")
2120 for p in Productions:
2123 f.write(" (%r,%d,%r,%r,%d),\n" % (p.name, p.len, p.func.__name__,p.file,p.line))
2125 f.write(" (%r,%d,None,None,None),\n" % (p.name, p.len))
2132 print "Unable to create '%s'" % filename
2136 def lr_read_tables(module=tab_module,optimize=0):
2137 global _lr_action, _lr_goto, _lr_productions, _lr_method
2139 exec "import %s as parsetab" % module
2141 if (optimize) or (Signature.digest() == parsetab._lr_signature):
2142 _lr_action = parsetab._lr_action
2143 _lr_goto = parsetab._lr_goto
2144 _lr_productions = parsetab._lr_productions
2145 _lr_method = parsetab._lr_method
2150 except (ImportError,AttributeError):
2153 # -----------------------------------------------------------------------------
2156 # Build the parser module
2157 # -----------------------------------------------------------------------------
2159 def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module, start=None, check_recursion=1, optimize=0,write_tables=1,debugfile=debug_file,outputdir=''):
2167 # Add starting symbol to signature
2169 Signature.update(start)
2171 # Add parsing method to signature
2172 Signature.update(method)
2174 # If a "module" parameter was supplied, extract its dictionary.
2175 # Note: a module may in fact be an instance as well.
2178 # User supplied a module object.
2179 if isinstance(module, types.ModuleType):
2180 ldict = module.__dict__
2181 elif isinstance(module, types.InstanceType):
2182 _items = [(k,getattr(module,k)) for k in dir(module)]
2187 raise ValueError,"Expected a module"
2190 # No module given. We might be able to get information from the caller.
2191 # Throw an exception and unwind the traceback to get the globals
2195 except RuntimeError:
2196 e,b,t = sys.exc_info()
2198 f = f.f_back # Walk out to our calling function
2199 ldict = f.f_globals # Grab its globals dictionary
2201 # If running in optimized mode. We're going to
2203 if (optimize and lr_read_tables(tabmodule,1)):
2206 for p in _lr_productions:
2208 Productions.append(None)
2210 m = MiniProduction()
2216 m.func = ldict[p[2]]
2217 Productions.append(m)
2220 # Get the tokens map
2221 if (module and isinstance(module,types.InstanceType)):
2222 tokens = getattr(module,"tokens",None)
2224 tokens = ldict.get("tokens",None)
2227 raise YaccError,"module does not define a list 'tokens'"
2228 if not (isinstance(tokens,types.ListType) or isinstance(tokens,types.TupleType)):
2229 raise YaccError,"tokens must be a list or tuple."
2231 # Check to see if a requires dictionary is defined.
2232 requires = ldict.get("require",None)
2234 if not (isinstance(requires,types.DictType)):
2235 raise YaccError,"require must be a dictionary."
2237 for r,v in requires.items():
2239 if not (isinstance(v,types.ListType)):
2241 v1 = [x.split(".") for x in v]
2243 except StandardError:
2244 print "Invalid specification for rule '%s' in require. Expected a list of strings" % r
2247 # Build the dictionary of terminals. We a record a 0 in the
2248 # dictionary to track whether or not a terminal is actually
2249 # used in the grammar
2251 if 'error' in tokens:
2252 print "yacc: Illegal token 'error'. Is a reserved word."
2253 raise YaccError,"Illegal token name"
2256 if Terminals.has_key(n):
2257 print "yacc: Warning. Token '%s' multiply defined." % n
2260 Terminals['error'] = [ ]
2262 # Get the precedence map (if any)
2263 prec = ldict.get("precedence",None)
2265 if not (isinstance(prec,types.ListType) or isinstance(prec,types.TupleType)):
2266 raise YaccError,"precedence must be a list or tuple."
2267 add_precedence(prec)
2268 Signature.update(repr(prec))
2271 if not Precedence.has_key(n):
2272 Precedence[n] = ('right',0) # Default, right associative, 0 precedence
2274 # Look for error handler
2275 ef = ldict.get('p_error',None)
2277 if isinstance(ef,types.FunctionType):
2279 elif isinstance(ef, types.MethodType):
2282 raise YaccError,"'p_error' defined, but is not a function or method."
2283 eline = ef.func_code.co_firstlineno
2284 efile = ef.func_code.co_filename
2287 if (ef.func_code.co_argcount != 1+ismethod):
2288 raise YaccError,"%s:%d: p_error() requires 1 argument." % (efile,eline)
2292 print "yacc: Warning. no p_error() function is defined."
2294 # Get the list of built-in functions with p_ prefix
2295 symbols = [ldict[f] for f in ldict.keys()
2296 if (type(ldict[f]) in (types.FunctionType, types.MethodType) and ldict[f].__name__[:2] == 'p_'
2297 and ldict[f].__name__ != 'p_error')]
2299 # Check for non-empty symbols
2300 if len(symbols) == 0:
2301 raise YaccError,"no rules of the form p_rulename are defined."
2303 # Sort the symbols by line number
2304 symbols.sort(lambda x,y: cmp(x.func_code.co_firstlineno,y.func_code.co_firstlineno))
2306 # Add all of the symbols to the grammar
2308 if (add_function(f)) < 0:
2311 files[f.func_code.co_filename] = None
2313 # Make a signature of the docstrings
2316 Signature.update(f.__doc__)
2321 raise YaccError,"Unable to construct parser."
2323 if not lr_read_tables(tabmodule):
2326 for filename in files.keys():
2327 if not validate_file(filename):
2330 # Validate dictionary
2331 validate_dict(ldict)
2333 if start and not Prodnames.has_key(start):
2334 raise YaccError,"Bad starting symbol '%s'" % start
2336 augment_grammar(start)
2337 error = verify_productions(cycle_check=check_recursion)
2338 otherfunc = [ldict[f] for f in ldict.keys()
2339 if (type(f) in (types.FunctionType,types.MethodType) and ldict[f].__name__[:2] != 'p_')]
2342 raise YaccError,"Unable to construct parser."
2346 compute_follow(start)
2350 elif method == 'LALR':
2353 raise YaccError, "Unknown parsing method '%s'" % method
2356 lr_write_tables(tabmodule,outputdir)
2360 f = open(os.path.join(outputdir,debugfile),"w")
2361 f.write(_vfc.getvalue())
2363 f.write(_vf.getvalue())
2366 print "yacc: can't create '%s'" % debugfile,e
2368 # Made it here. Create a parser object and set up its internal state.
2369 # Set global parse() method to bound method of parser object.
2372 p.productions = Productions
2373 p.errorfunc = Errorfunc
2374 p.action = _lr_action
2376 p.method = _lr_method
2377 p.require = Requires
2382 # Clean up all of the globals we created
2387 # yacc_cleanup function. Delete all of the global variables
2388 # used during table construction
2391 global _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2392 del _lr_action, _lr_goto, _lr_method, _lr_goto_cache
2394 global Productions, Prodnames, Prodmap, Terminals
2395 global Nonterminals, First, Follow, Precedence, LRitems
2396 global Errorfunc, Signature, Requires
2397 global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
2399 del Productions, Prodnames, Prodmap, Terminals
2400 del Nonterminals, First, Follow, Precedence, LRitems
2401 del Errorfunc, Signature, Requires
2402 del Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
2408 # Stub that raises an error if parsing is attempted without first calling yacc()
2409 def parse(*args,**kwargs):
2410 raise YaccError, "yacc: No parser built with yacc()"