tools/yacc.py

   1 # -----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Copyright (C) 2001-2009,
   5 # David M. Beazley (Dabeaz LLC)
   6 # All rights reserved.
   7 #
   8 # Redistribution and use in source and binary forms, with or without
   9 # modification, are permitted provided that the following conditions are
  10 # met:
  11 #
  12 # * Redistributions of source code must retain the above copyright notice,
  13 #   this list of conditions and the following disclaimer.
  14 # * Redistributions in binary form must reproduce the above copyright notice,
  15 #   this list of conditions and the following disclaimer in the documentation
  16 #   and/or other materials provided with the distribution.
  17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
  18 #   endorse or promote products derived from this software without
  19 #  specific prior written permission.
  20 #
  21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32 # -----------------------------------------------------------------------------
  33 #
  34 # This implements an LR parser that is constructed from grammar rules defined
  35 # as Python functions. The grammer is specified by supplying the BNF inside
  36 # Python documentation strings.  The inspiration for this technique was borrowed
  37 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
  38 # Spark and the GNU bison utility.
  39 #
  40 # The current implementation is only somewhat object-oriented. The
  41 # LR parser itself is defined in terms of an object (which allows multiple
  42 # parsers to co-exist).  However, most of the variables used during table
  43 # construction are defined in terms of global variables.  Users shouldn't
  44 # notice unless they are trying to define multiple parsers at the same
  45 # time using threads (in which case they should have their head examined).
  46 #
  47 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
  48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  50 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
  51 # by the more efficient DeRemer and Pennello algorithm.
  52 #
  53 # :::::::: WARNING :::::::
  54 #
  55 # Construction of LR parsing tables is fairly complicated and expensive.
  56 # To make this module run fast, a *LOT* of work has been put into
  57 # optimization---often at the expensive of readability and what might
  58 # consider to be good Python "coding style."   Modify the code at your
  59 # own risk!
  60 # ----------------------------------------------------------------------------
  61
  62 __version__    = "3.3"
  63 __tabversion__ = "3.2"       # Table version
  64
  65 #-----------------------------------------------------------------------------
  66 #                     === User configurable parameters ===
  67 #
  68 # Change these to modify the default behavior of yacc (if you wish)
  69 #-----------------------------------------------------------------------------
  70
  71 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
  72                                # a 'parser.out' file in the current directory
  73
  74 debug_file  = 'parser.out'     # Default name of the debugging file
  75 tab_module  = 'parsetab'       # Default name of the table module
  76 default_lr  = 'LALR'           # Default LR table generation method
  77
  78 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  79
  80 yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
  81                                # implementations of certain functions.
  82
  83 resultlimit = 40               # Size limit of results when running in debug mode.
  84
  85 pickle_protocol = 0            # Protocol to use when writing pickle files
  86
  87 import re, types, sys, os.path
  88
  89 # Compatibility function for python 2.6/3.0
  90 if sys.version_info[0] < 3:
  91     def func_code(f):
  92         return f.func_code
  93 else:
  94     def func_code(f):
  95         return f.__code__
  96
  97 # Compatibility
  98 try:
  99     MAXINT = sys.maxint
 100 except AttributeError:
 101     MAXINT = sys.maxsize
 102
 103 # Python 2.x/3.0 compatibility.
 104 def load_ply_lex():
 105     if sys.version_info[0] < 3:
 106         import lex
 107     else:
 108         import ply.lex as lex
 109     return lex
 110
 111 # This object is a stand-in for a logging object created by the
 112 # logging module.   PLY will use this by default to create things
 113 # such as the parser.out file.  If a user wants more detailed
 114 # information, they can create their own logging object and pass
 115 # it into PLY.
 116
 117 class PlyLogger(object):
 118     def __init__(self,f):
 119         self.f = f
 120     def debug(self,msg,*args,**kwargs):
 121         self.f.write((msg % args) + "\n")
 122     info     = debug
 123
 124     def warning(self,msg,*args,**kwargs):
 125         self.f.write("WARNING: "+ (msg % args) + "\n")
 126
 127     def error(self,msg,*args,**kwargs):
 128         self.f.write("ERROR: " + (msg % args) + "\n")
 129
 130     critical = debug
 131
 132 # Null logger is used when no output is generated. Does nothing.
 133 class NullLogger(object):
 134     def __getattribute__(self,name):
 135         return self
 136     def __call__(self,*args,**kwargs):
 137         return self
 138
 139 # Exception raised for yacc-related errors
 140 class YaccError(Exception):   pass
 141
 142 # Format the result message that the parser produces when running in debug mode.
 143 def format_result(r):
 144     repr_str = repr(r)
 145     if '\n' in repr_str: repr_str = repr(repr_str)
 146     if len(repr_str) > resultlimit:
 147         repr_str = repr_str[:resultlimit]+" ..."
 148     result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
 149     return result
 150
 151
 152 # Format stack entries when the parser is running in debug mode
 153 def format_stack_entry(r):
 154     repr_str = repr(r)
 155     if '\n' in repr_str: repr_str = repr(repr_str)
 156     if len(repr_str) < 16:
 157         return repr_str
 158     else:
 159         return "<%s @ 0x%x>" % (type(r).__name__,id(r))
 160
 161 #-----------------------------------------------------------------------------
 162 #                        ===  LR Parsing Engine ===
 163 #
 164 # The following classes are used for the LR parser itself.  These are not
 165 # used during table construction and are independent of the actual LR
 166 # table generation algorithm
 167 #-----------------------------------------------------------------------------
 168
 169 # This class is used to hold non-terminal grammar symbols during parsing.
 170 # It normally has the following attributes set:
 171 #        .type       = Grammar symbol type
 172 #        .value      = Symbol value
 173 #        .lineno     = Starting line number
 174 #        .endlineno  = Ending line number (optional, set automatically)
 175 #        .lexpos     = Starting lex position
 176 #        .endlexpos  = Ending lex position (optional, set automatically)
 177
 178 class YaccSymbol:
 179     def __str__(self):    return self.type
 180     def __repr__(self):   return str(self)
 181
 182 # This class is a wrapper around the objects actually passed to each
 183 # grammar rule.   Index lookup and assignment actually assign the
 184 # .value attribute of the underlying YaccSymbol object.
 185 # The lineno() method returns the line number of a given
 186 # item (or 0 if not defined).   The linespan() method returns
 187 # a tuple of (startline,endline) representing the range of lines
 188 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 189 # representing the range of positional information for a symbol.
 190
 191 class YaccProduction:
 192     def __init__(self,s,stack=None):
 193         self.slice = s
 194         self.stack = stack
 195         self.lexer = None
 196         self.parser= None
 197     def __getitem__(self,n):
 198         if n >= 0: return self.slice[n].value
 199         else: return self.stack[n].value
 200
 201     def __setitem__(self,n,v):
 202         self.slice[n].value = v
 203
 204     def __getslice__(self,i,j):
 205         return [s.value for s in self.slice[i:j]]
 206
 207     def __len__(self):
 208         return len(self.slice)
 209
 210     def lineno(self,n):
 211         return getattr(self.slice[n],"lineno",0)
 212
 213     def set_lineno(self,n,lineno):
 214         self.slice[n].lineno = lineno
 215
 216     def linespan(self,n):
 217         startline = getattr(self.slice[n],"lineno",0)
 218         endline = getattr(self.slice[n],"endlineno",startline)
 219         return startline,endline
 220
 221     def lexpos(self,n):
 222         return getattr(self.slice[n],"lexpos",0)
 223
 224     def lexspan(self,n):
 225         startpos = getattr(self.slice[n],"lexpos",0)
 226         endpos = getattr(self.slice[n],"endlexpos",startpos)
 227         return startpos,endpos
 228
 229     def error(self):
 230        raise SyntaxError
 231
 232
 233 # -----------------------------------------------------------------------------
 234 #                               == LRParser ==
 235 #
 236 # The LR Parsing engine.
 237 # -----------------------------------------------------------------------------
 238
 239 class LRParser:
 240     def __init__(self,lrtab,errorf):
 241         self.productions = lrtab.lr_productions
 242         self.action      = lrtab.lr_action
 243         self.goto        = lrtab.lr_goto
 244         self.errorfunc   = errorf
 245
 246     def errok(self):
 247         self.errorok     = 1
 248
 249     def restart(self):
 250         del self.statestack[:]
 251         del self.symstack[:]
 252         sym = YaccSymbol()
 253         sym.type = '$end'
 254         self.symstack.append(sym)
 255         self.statestack.append(0)
 256
 257     def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 258         if debug or yaccdevel:
 259             if isinstance(debug,int):
 260                 debug = PlyLogger(sys.stderr)
 261             return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
 262         elif tracking:
 263             return self.parseopt(input,lexer,debug,tracking,tokenfunc)
 264         else:
 265             return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
 266
 267
 268     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 269     # parsedebug().
 270     #
 271     # This is the debugging enabled version of parse().  All changes made to the
 272     # parsing engine should be made here.   For the non-debugging version,
 273     # copy this code to a method parseopt() and delete all of the sections
 274     # enclosed in:
 275     #
 276     #      #--! DEBUG
 277     #      statements
 278     #      #--! DEBUG
 279     #
 280     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 281
 282     def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
 283         lookahead = None                 # Current lookahead symbol
 284         lookaheadstack = [ ]             # Stack of lookahead symbols
 285         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 286         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 287         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 288         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 289         errorcount = 0                   # Used during error recovery
 290
 291         # --! DEBUG
 292         debug.info("PLY: PARSE DEBUG START")
 293         # --! DEBUG
 294
 295         # If no lexer was given, we will try to use the lex module
 296         if not lexer:
 297             lex = load_ply_lex()
 298             lexer = lex.lexer
 299
 300         # Set up the lexer and parser objects on pslice
 301         pslice.lexer = lexer
 302         pslice.parser = self
 303
 304         # If input was supplied, pass to lexer
 305         if input is not None:
 306             lexer.input(input)
 307
 308         if tokenfunc is None:
 309            # Tokenize function
 310            get_token = lexer.token
 311         else:
 312            get_token = tokenfunc
 313
 314         # Set up the state and symbol stacks
 315
 316         statestack = [ ]                # Stack of parsing states
 317         self.statestack = statestack
 318         symstack   = [ ]                # Stack of grammar symbols
 319         self.symstack = symstack
 320
 321         pslice.stack = symstack         # Put in the production
 322         errtoken   = None               # Err token
 323
 324         # The start state is assumed to be (0,$end)
 325
 326         statestack.append(0)
 327         sym = YaccSymbol()
 328         sym.type = "$end"
 329         symstack.append(sym)
 330         state = 0
 331         while 1:
 332             # Get the next symbol on the input.  If a lookahead symbol
 333             # is already set, we just use that. Otherwise, we'll pull
 334             # the next token off of the lookaheadstack or from the lexer
 335
 336             # --! DEBUG
 337             debug.debug('')
 338             debug.debug('State  : %s', state)
 339             # --! DEBUG
 340
 341             if not lookahead:
 342                 if not lookaheadstack:
 343                     lookahead = get_token()     # Get the next token
 344                 else:
 345                     lookahead = lookaheadstack.pop()
 346                 if not lookahead:
 347                     lookahead = YaccSymbol()
 348                     lookahead.type = "$end"
 349
 350             # --! DEBUG
 351             debug.debug('Stack  : %s',
 352                         ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 353             # --! DEBUG
 354
 355             # Check the action table
 356             ltype = lookahead.type
 357             t = actions[state].get(ltype)
 358
 359             if t is not None:
 360                 if t > 0:
 361                     # shift a symbol on the stack
 362                     statestack.append(t)
 363                     state = t
 364
 365                     # --! DEBUG
 366                     debug.debug("Action : Shift and goto state %s", t)
 367                     # --! DEBUG
 368
 369                     symstack.append(lookahead)
 370                     lookahead = None
 371
 372                     # Decrease error count on successful shift
 373                     if errorcount: errorcount -=1
 374                     continue
 375
 376                 if t < 0:
 377                     # reduce a symbol on the stack, emit a production
 378                     p = prod[-t]
 379                     pname = p.name
 380                     plen  = p.len
 381
 382                     # Get production function
 383                     sym = YaccSymbol()
 384                     sym.type = pname       # Production name
 385                     sym.value = None
 386
 387                     # --! DEBUG
 388                     if plen:
 389                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
 390                     else:
 391                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
 392
 393                     # --! DEBUG
 394
 395                     if plen:
 396                         targ = symstack[-plen-1:]
 397                         targ[0] = sym
 398
 399                         # --! TRACKING
 400                         if tracking:
 401                            t1 = targ[1]
 402                            sym.lineno = t1.lineno
 403                            sym.lexpos = t1.lexpos
 404                            t1 = targ[-1]
 405                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
 406                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
 407
 408                         # --! TRACKING
 409
 410                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 411                         # The code enclosed in this section is duplicated
 412                         # below as a performance optimization.  Make sure
 413                         # changes get made in both locations.
 414
 415                         pslice.slice = targ
 416
 417                         try:
 418                             # Call the grammar rule with our special slice object
 419                             del symstack[-plen:]
 420                             del statestack[-plen:]
 421                             p.callable(pslice)
 422                             # --! DEBUG
 423                             debug.info("Result : %s", format_result(pslice[0]))
 424                             # --! DEBUG
 425                             symstack.append(sym)
 426                             state = goto[statestack[-1]][pname]
 427                             statestack.append(state)
 428                         except SyntaxError:
 429                             # If an error was set. Enter error recovery state
 430                             lookaheadstack.append(lookahead)
 431                             symstack.pop()
 432                             statestack.pop()
 433                             state = statestack[-1]
 434                             sym.type = 'error'
 435                             lookahead = sym
 436                             errorcount = error_count
 437                             self.errorok = 0
 438                         continue
 439                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 440
 441                     else:
 442
 443                         # --! TRACKING
 444                         if tracking:
 445                            sym.lineno = lexer.lineno
 446                            sym.lexpos = lexer.lexpos
 447                         # --! TRACKING
 448
 449                         targ = [ sym ]
 450
 451                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 452                         # The code enclosed in this section is duplicated
 453                         # above as a performance optimization.  Make sure
 454                         # changes get made in both locations.
 455
 456                         pslice.slice = targ
 457
 458                         try:
 459                             # Call the grammar rule with our special slice object
 460                             p.callable(pslice)
 461                             # --! DEBUG
 462                             debug.info("Result : %s", format_result(pslice[0]))
 463                             # --! DEBUG
 464                             symstack.append(sym)
 465                             state = goto[statestack[-1]][pname]
 466                             statestack.append(state)
 467                         except SyntaxError:
 468                             # If an error was set. Enter error recovery state
 469                             lookaheadstack.append(lookahead)
 470                             symstack.pop()
 471                             statestack.pop()
 472                             state = statestack[-1]
 473                             sym.type = 'error'
 474                             lookahead = sym
 475                             errorcount = error_count
 476                             self.errorok = 0
 477                         continue
 478                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 479
 480                 if t == 0:
 481                     n = symstack[-1]
 482                     result = getattr(n,"value",None)
 483                     # --! DEBUG
 484                     debug.info("Done   : Returning %s", format_result(result))
 485                     debug.info("PLY: PARSE DEBUG END")
 486                     # --! DEBUG
 487                     return result
 488
 489             if t == None:
 490
 491                 # --! DEBUG
 492                 debug.error('Error  : %s',
 493                             ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 494                 # --! DEBUG
 495
 496                 # We have some kind of parsing error here.  To handle
 497                 # this, we are going to push the current token onto
 498                 # the tokenstack and replace it with an 'error' token.
 499                 # If there are any synchronization rules, they may
 500                 # catch it.
 501                 #
 502                 # In addition to pushing the error token, we call call
 503                 # the user defined p_error() function if this is the
 504                 # first syntax error.  This function is only called if
 505                 # errorcount == 0.
 506                 if errorcount == 0 or self.errorok:
 507                     errorcount = error_count
 508                     self.errorok = 0
 509                     errtoken = lookahead
 510                     if errtoken.type == "$end":
 511                         errtoken = None               # End of file!
 512                     if self.errorfunc:
 513                         global errok,token,restart
 514                         errok = self.errok        # Set some special functions available in error recovery
 515                         token = get_token
 516                         restart = self.restart
 517                         if errtoken and not hasattr(errtoken,'lexer'):
 518                             errtoken.lexer = lexer
 519                         tok = self.errorfunc(errtoken)
 520                         del errok, token, restart   # Delete special functions
 521
 522                         if self.errorok:
 523                             # User must have done some kind of panic
 524                             # mode recovery on their own.  The
 525                             # returned token is the next lookahead
 526                             lookahead = tok
 527                             errtoken = None
 528                             continue
 529                     else:
 530                         if errtoken:
 531                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 532                             else: lineno = 0
 533                             if lineno:
 534                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
 535                             else:
 536                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
 537                         else:
 538                             sys.stderr.write("yacc: Parse error in input. EOF\n")
 539                             return
 540
 541                 else:
 542                     errorcount = error_count
 543
 544                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 545                 # entire parse has been rolled back and we're completely hosed.   The token is
 546                 # discarded and we just keep going.
 547
 548                 if len(statestack) <= 1 and lookahead.type != "$end":
 549                     lookahead = None
 550                     errtoken = None
 551                     state = 0
 552                     # Nuke the pushback stack
 553                     del lookaheadstack[:]
 554                     continue
 555
 556                 # case 2: the statestack has a couple of entries on it, but we're
 557                 # at the end of the file. nuke the top entry and generate an error token
 558
 559                 # Start nuking entries on the stack
 560                 if lookahead.type == "$end":
 561                     # Whoa. We're really hosed here. Bail out
 562                     return
 563
 564                 if lookahead.type != 'error':
 565                     sym = symstack[-1]
 566                     if sym.type == 'error':
 567                         # Hmmm. Error is on top of stack, we'll just nuke input
 568                         # symbol and continue
 569                         lookahead = None
 570                         continue
 571                     t = YaccSymbol()
 572                     t.type = 'error'
 573                     if hasattr(lookahead,"lineno"):
 574                         t.lineno = lookahead.lineno
 575                     t.value = lookahead
 576                     lookaheadstack.append(lookahead)
 577                     lookahead = t
 578                 else:
 579                     symstack.pop()
 580                     statestack.pop()
 581                     state = statestack[-1]       # Potential bug fix
 582
 583                 continue
 584
 585             # Call an error function here
 586             raise RuntimeError("yacc: internal parser error!!!\n")
 587
 588     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 589     # parseopt().
 590     #
 591     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
 592     # Edit the debug version above, then copy any modifications to the method
 593     # below while removing #--! DEBUG sections.
 594     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 595
 596
 597     def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 598         lookahead = None                 # Current lookahead symbol
 599         lookaheadstack = [ ]             # Stack of lookahead symbols
 600         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 601         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 602         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 603         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 604         errorcount = 0                   # Used during error recovery
 605
 606         # If no lexer was given, we will try to use the lex module
 607         if not lexer:
 608             lex = load_ply_lex()
 609             lexer = lex.lexer
 610
 611         # Set up the lexer and parser objects on pslice
 612         pslice.lexer = lexer
 613         pslice.parser = self
 614
 615         # If input was supplied, pass to lexer
 616         if input is not None:
 617             lexer.input(input)
 618
 619         if tokenfunc is None:
 620            # Tokenize function
 621            get_token = lexer.token
 622         else:
 623            get_token = tokenfunc
 624
 625         # Set up the state and symbol stacks
 626
 627         statestack = [ ]                # Stack of parsing states
 628         self.statestack = statestack
 629         symstack   = [ ]                # Stack of grammar symbols
 630         self.symstack = symstack
 631
 632         pslice.stack = symstack         # Put in the production
 633         errtoken   = None               # Err token
 634
 635         # The start state is assumed to be (0,$end)
 636
 637         statestack.append(0)
 638         sym = YaccSymbol()
 639         sym.type = '$end'
 640         symstack.append(sym)
 641         state = 0
 642         while 1:
 643             # Get the next symbol on the input.  If a lookahead symbol
 644             # is already set, we just use that. Otherwise, we'll pull
 645             # the next token off of the lookaheadstack or from the lexer
 646
 647             if not lookahead:
 648                 if not lookaheadstack:
 649                     lookahead = get_token()     # Get the next token
 650                 else:
 651                     lookahead = lookaheadstack.pop()
 652                 if not lookahead:
 653                     lookahead = YaccSymbol()
 654                     lookahead.type = '$end'
 655
 656             # Check the action table
 657             ltype = lookahead.type
 658             t = actions[state].get(ltype)
 659
 660             if t is not None:
 661                 if t > 0:
 662                     # shift a symbol on the stack
 663                     statestack.append(t)
 664                     state = t
 665
 666                     symstack.append(lookahead)
 667                     lookahead = None
 668
 669                     # Decrease error count on successful shift
 670                     if errorcount: errorcount -=1
 671                     continue
 672
 673                 if t < 0:
 674                     # reduce a symbol on the stack, emit a production
 675                     p = prod[-t]
 676                     pname = p.name
 677                     plen  = p.len
 678
 679                     # Get production function
 680                     sym = YaccSymbol()
 681                     sym.type = pname       # Production name
 682                     sym.value = None
 683
 684                     if plen:
 685                         targ = symstack[-plen-1:]
 686                         targ[0] = sym
 687
 688                         # --! TRACKING
 689                         if tracking:
 690                            t1 = targ[1]
 691                            sym.lineno = t1.lineno
 692                            sym.lexpos = t1.lexpos
 693                            t1 = targ[-1]
 694                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
 695                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
 696
 697                         # --! TRACKING
 698
 699                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 700                         # The code enclosed in this section is duplicated
 701                         # below as a performance optimization.  Make sure
 702                         # changes get made in both locations.
 703
 704                         pslice.slice = targ
 705
 706                         try:
 707                             # Call the grammar rule with our special slice object
 708                             del symstack[-plen:]
 709                             del statestack[-plen:]
 710                             p.callable(pslice)
 711                             symstack.append(sym)
 712                             state = goto[statestack[-1]][pname]
 713                             statestack.append(state)
 714                         except SyntaxError:
 715                             # If an error was set. Enter error recovery state
 716                             lookaheadstack.append(lookahead)
 717                             symstack.pop()
 718                             statestack.pop()
 719                             state = statestack[-1]
 720                             sym.type = 'error'
 721                             lookahead = sym
 722                             errorcount = error_count
 723                             self.errorok = 0
 724                         continue
 725                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 726
 727                     else:
 728
 729                         # --! TRACKING
 730                         if tracking:
 731                            sym.lineno = lexer.lineno
 732                            sym.lexpos = lexer.lexpos
 733                         # --! TRACKING
 734
 735                         targ = [ sym ]
 736
 737                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 738                         # The code enclosed in this section is duplicated
 739                         # above as a performance optimization.  Make sure
 740                         # changes get made in both locations.
 741
 742                         pslice.slice = targ
 743
 744                         try:
 745                             # Call the grammar rule with our special slice object
 746                             p.callable(pslice)
 747                             symstack.append(sym)
 748                             state = goto[statestack[-1]][pname]
 749                             statestack.append(state)
 750                         except SyntaxError:
 751                             # If an error was set. Enter error recovery state
 752                             lookaheadstack.append(lookahead)
 753                             symstack.pop()
 754                             statestack.pop()
 755                             state = statestack[-1]
 756                             sym.type = 'error'
 757                             lookahead = sym
 758                             errorcount = error_count
 759                             self.errorok = 0
 760                         continue
 761                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 762
 763                 if t == 0:
 764                     n = symstack[-1]
 765                     return getattr(n,"value",None)
 766
 767             if t == None:
 768
 769                 # We have some kind of parsing error here.  To handle
 770                 # this, we are going to push the current token onto
 771                 # the tokenstack and replace it with an 'error' token.
 772                 # If there are any synchronization rules, they may
 773                 # catch it.
 774                 #
 775                 # In addition to pushing the error token, we call call
 776                 # the user defined p_error() function if this is the
 777                 # first syntax error.  This function is only called if
 778                 # errorcount == 0.
 779                 if errorcount == 0 or self.errorok:
 780                     errorcount = error_count
 781                     self.errorok = 0
 782                     errtoken = lookahead
 783                     if errtoken.type == '$end':
 784                         errtoken = None               # End of file!
 785                     if self.errorfunc:
 786                         global errok,token,restart
 787                         errok = self.errok        # Set some special functions available in error recovery
 788                         token = get_token
 789                         restart = self.restart
 790                         if errtoken and not hasattr(errtoken,'lexer'):
 791                             errtoken.lexer = lexer
 792                         tok = self.errorfunc(errtoken)
 793                         del errok, token, restart   # Delete special functions
 794
 795                         if self.errorok:
 796                             # User must have done some kind of panic
 797                             # mode recovery on their own.  The
 798                             # returned token is the next lookahead
 799                             lookahead = tok
 800                             errtoken = None
 801                             continue
 802                     else:
 803                         if errtoken:
 804                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 805                             else: lineno = 0
 806                             if lineno:
 807                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
 808                             else:
 809                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
 810                         else:
 811                             sys.stderr.write("yacc: Parse error in input. EOF\n")
 812                             return
 813
 814                 else:
 815                     errorcount = error_count
 816
 817                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 818                 # entire parse has been rolled back and we're completely hosed.   The token is
 819                 # discarded and we just keep going.
 820
 821                 if len(statestack) <= 1 and lookahead.type != '$end':
 822                     lookahead = None
 823                     errtoken = None
 824                     state = 0
 825                     # Nuke the pushback stack
 826                     del lookaheadstack[:]
 827                     continue
 828
 829                 # case 2: the statestack has a couple of entries on it, but we're
 830                 # at the end of the file. nuke the top entry and generate an error token
 831
 832                 # Start nuking entries on the stack
 833                 if lookahead.type == '$end':
 834                     # Whoa. We're really hosed here. Bail out
 835                     return
 836
 837                 if lookahead.type != 'error':
 838                     sym = symstack[-1]
 839                     if sym.type == 'error':
 840                         # Hmmm. Error is on top of stack, we'll just nuke input
 841                         # symbol and continue
 842                         lookahead = None
 843                         continue
 844                     t = YaccSymbol()
 845                     t.type = 'error'
 846                     if hasattr(lookahead,"lineno"):
 847                         t.lineno = lookahead.lineno
 848                     t.value = lookahead
 849                     lookaheadstack.append(lookahead)
 850                     lookahead = t
 851                 else:
 852                     symstack.pop()
 853                     statestack.pop()
 854                     state = statestack[-1]       # Potential bug fix
 855
 856                 continue
 857
 858             # Call an error function here
 859             raise RuntimeError("yacc: internal parser error!!!\n")
 860
 861     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 862     # parseopt_notrack().
 863     #
 864     # Optimized version of parseopt() with line number tracking removed.
 865     # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
 866     # code in the #--! TRACKING sections
 867     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 868
 869     def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 870         lookahead = None                 # Current lookahead symbol
 871         lookaheadstack = [ ]             # Stack of lookahead symbols
 872         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 873         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 874         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 875         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 876         errorcount = 0                   # Used during error recovery
 877
 878         # If no lexer was given, we will try to use the lex module
 879         if not lexer:
 880             lex = load_ply_lex()
 881             lexer = lex.lexer
 882
 883         # Set up the lexer and parser objects on pslice
 884         pslice.lexer = lexer
 885         pslice.parser = self
 886
 887         # If input was supplied, pass to lexer
 888         if input is not None:
 889             lexer.input(input)
 890
 891         if tokenfunc is None:
 892            # Tokenize function
 893            get_token = lexer.token
 894         else:
 895            get_token = tokenfunc
 896
 897         # Set up the state and symbol stacks
 898
 899         statestack = [ ]                # Stack of parsing states
 900         self.statestack = statestack
 901         symstack   = [ ]                # Stack of grammar symbols
 902         self.symstack = symstack
 903
 904         pslice.stack = symstack         # Put in the production
 905         errtoken   = None               # Err token
 906
 907         # The start state is assumed to be (0,$end)
 908
 909         statestack.append(0)
 910         sym = YaccSymbol()
 911         sym.type = '$end'
 912         symstack.append(sym)
 913         state = 0
 914         while 1:
 915             # Get the next symbol on the input.  If a lookahead symbol
 916             # is already set, we just use that. Otherwise, we'll pull
 917             # the next token off of the lookaheadstack or from the lexer
 918
 919             if not lookahead:
 920                 if not lookaheadstack:
 921                     lookahead = get_token()     # Get the next token
 922                 else:
 923                     lookahead = lookaheadstack.pop()
 924                 if not lookahead:
 925                     lookahead = YaccSymbol()
 926                     lookahead.type = '$end'
 927
 928             # Check the action table
 929             ltype = lookahead.type
 930             t = actions[state].get(ltype)
 931
 932             if t is not None:
 933                 if t > 0:
 934                     # shift a symbol on the stack
 935                     statestack.append(t)
 936                     state = t
 937
 938                     symstack.append(lookahead)
 939                     lookahead = None
 940
 941                     # Decrease error count on successful shift
 942                     if errorcount: errorcount -=1
 943                     continue
 944
 945                 if t < 0:
 946                     # reduce a symbol on the stack, emit a production
 947                     p = prod[-t]
 948                     pname = p.name
 949                     plen  = p.len
 950
 951                     # Get production function
 952                     sym = YaccSymbol()
 953                     sym.type = pname       # Production name
 954                     sym.value = None
 955
 956                     if plen:
 957                         targ = symstack[-plen-1:]
 958                         targ[0] = sym
 959
 960                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 961                         # The code enclosed in this section is duplicated
 962                         # below as a performance optimization.  Make sure
 963                         # changes get made in both locations.
 964
 965                         pslice.slice = targ
 966
 967                         try:
 968                             # Call the grammar rule with our special slice object
 969                             del symstack[-plen:]
 970                             del statestack[-plen:]
 971                             p.callable(pslice)
 972                             symstack.append(sym)
 973                             state = goto[statestack[-1]][pname]
 974                             statestack.append(state)
 975                         except SyntaxError:
 976                             # If an error was set. Enter error recovery state
 977                             lookaheadstack.append(lookahead)
 978                             symstack.pop()
 979                             statestack.pop()
 980                             state = statestack[-1]
 981                             sym.type = 'error'
 982                             lookahead = sym
 983                             errorcount = error_count
 984                             self.errorok = 0
 985                         continue
 986                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 987
 988                     else:
 989
 990                         targ = [ sym ]
 991
 992                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 993                         # The code enclosed in this section is duplicated
 994                         # above as a performance optimization.  Make sure
 995                         # changes get made in both locations.
 996
 997                         pslice.slice = targ
 998
 999                         try:
1000                             # Call the grammar rule with our special slice object
1001                             p.callable(pslice)
1002                             symstack.append(sym)
1003                             state = goto[statestack[-1]][pname]
1004                             statestack.append(state)
1005                         except SyntaxError:
1006                             # If an error was set. Enter error recovery state
1007                             lookaheadstack.append(lookahead)
1008                             symstack.pop()
1009                             statestack.pop()
1010                             state = statestack[-1]
1011                             sym.type = 'error'
1012                             lookahead = sym
1013                             errorcount = error_count
1014                             self.errorok = 0
1015                         continue
1016                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1017
1018                 if t == 0:
1019                     n = symstack[-1]
1020                     return getattr(n,"value",None)
1021
1022             if t == None:
1023
1024                 # We have some kind of parsing error here.  To handle
1025                 # this, we are going to push the current token onto
1026                 # the tokenstack and replace it with an 'error' token.
1027                 # If there are any synchronization rules, they may
1028                 # catch it.
1029                 #
1030                 # In addition to pushing the error token, we call call
1031                 # the user defined p_error() function if this is the
1032                 # first syntax error.  This function is only called if
1033                 # errorcount == 0.
1034                 if errorcount == 0 or self.errorok:
1035                     errorcount = error_count
1036                     self.errorok = 0
1037                     errtoken = lookahead
1038                     if errtoken.type == '$end':
1039                         errtoken = None               # End of file!
1040                     if self.errorfunc:
1041                         global errok,token,restart
1042                         errok = self.errok        # Set some special functions available in error recovery
1043                         token = get_token
1044                         restart = self.restart
1045                         if errtoken and not hasattr(errtoken,'lexer'):
1046                             errtoken.lexer = lexer
1047                         tok = self.errorfunc(errtoken)
1048                         del errok, token, restart   # Delete special functions
1049
1050                         if self.errorok:
1051                             # User must have done some kind of panic
1052                             # mode recovery on their own.  The
1053                             # returned token is the next lookahead
1054                             lookahead = tok
1055                             errtoken = None
1056                             continue
1057                     else:
1058                         if errtoken:
1059                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1060                             else: lineno = 0
1061                             if lineno:
1062                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1063                             else:
1064                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1065                         else:
1066                             sys.stderr.write("yacc: Parse error in input. EOF\n")
1067                             return
1068
1069                 else:
1070                     errorcount = error_count
1071
1072                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1073                 # entire parse has been rolled back and we're completely hosed.   The token is
1074                 # discarded and we just keep going.
1075
1076                 if len(statestack) <= 1 and lookahead.type != '$end':
1077                     lookahead = None
1078                     errtoken = None
1079                     state = 0
1080                     # Nuke the pushback stack
1081                     del lookaheadstack[:]
1082                     continue
1083
1084                 # case 2: the statestack has a couple of entries on it, but we're
1085                 # at the end of the file. nuke the top entry and generate an error token
1086
1087                 # Start nuking entries on the stack
1088                 if lookahead.type == '$end':
1089                     # Whoa. We're really hosed here. Bail out
1090                     return
1091
1092                 if lookahead.type != 'error':
1093                     sym = symstack[-1]
1094                     if sym.type == 'error':
1095                         # Hmmm. Error is on top of stack, we'll just nuke input
1096                         # symbol and continue
1097                         lookahead = None
1098                         continue
1099                     t = YaccSymbol()
1100                     t.type = 'error'
1101                     if hasattr(lookahead,"lineno"):
1102                         t.lineno = lookahead.lineno
1103                     t.value = lookahead
1104                     lookaheadstack.append(lookahead)
1105                     lookahead = t
1106                 else:
1107                     symstack.pop()
1108                     statestack.pop()
1109                     state = statestack[-1]       # Potential bug fix
1110
1111                 continue
1112
1113             # Call an error function here
1114             raise RuntimeError("yacc: internal parser error!!!\n")
1115
1116 # -----------------------------------------------------------------------------
1117 #                          === Grammar Representation ===
1118 #
1119 # The following functions, classes, and variables are used to represent and
1120 # manipulate the rules that make up a grammar.
1121 # -----------------------------------------------------------------------------
1122
1123 import re
1124
1125 # regex matching identifiers
1126 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1127
1128 # -----------------------------------------------------------------------------
1129 # class Production:
1130 #
1131 # This class stores the raw information about a single production or grammar rule.
1132 # A grammar rule refers to a specification such as this:
1133 #
1134 #       expr : expr PLUS term
1135 #
1136 # Here are the basic attributes defined on all productions
1137 #
1138 #       name     - Name of the production.  For example 'expr'
1139 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
1140 #       prec     - Production precedence level
1141 #       number   - Production number.
1142 #       func     - Function that executes on reduce
1143 #       file     - File where production function is defined
1144 #       lineno   - Line number where production function is defined
1145 #
1146 # The following attributes are defined or optional.
1147 #
1148 #       len       - Length of the production (number of symbols on right hand side)
1149 #       usyms     - Set of unique symbols found in the production
1150 # -----------------------------------------------------------------------------
1151
1152 class Production(object):
1153     reduced = 0
1154     def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1155         self.name     = name
1156         self.prod     = tuple(prod)
1157         self.number   = number
1158         self.func     = func
1159         self.callable = None
1160         self.file     = file
1161         self.line     = line
1162         self.prec     = precedence
1163
1164         # Internal settings used during table construction
1165
1166         self.len  = len(self.prod)   # Length of the production
1167
1168         # Create a list of unique production symbols used in the production
1169         self.usyms = [ ]
1170         for s in self.prod:
1171             if s not in self.usyms:
1172                 self.usyms.append(s)
1173
1174         # List of all LR items for the production
1175         self.lr_items = []
1176         self.lr_next = None
1177
1178         # Create a string representation
1179         if self.prod:
1180             self.str = "%s -> %s" % (self.name," ".join(self.prod))
1181         else:
1182             self.str = "%s -> <empty>" % self.name
1183
1184     def __str__(self):
1185         return self.str
1186
1187     def __repr__(self):
1188         return "Production("+str(self)+")"
1189
1190     def __len__(self):
1191         return len(self.prod)
1192
1193     def __nonzero__(self):
1194         return 1
1195
1196     def __getitem__(self,index):
1197         return self.prod[index]
1198
1199     # Return the nth lr_item from the production (or None if at the end)
1200     def lr_item(self,n):
1201         if n > len(self.prod): return None
1202         p = LRItem(self,n)
1203
1204         # Precompute the list of productions immediately following.  Hack. Remove later
1205         try:
1206             p.lr_after = Prodnames[p.prod[n+1]]
1207         except (IndexError,KeyError):
1208             p.lr_after = []
1209         try:
1210             p.lr_before = p.prod[n-1]
1211         except IndexError:
1212             p.lr_before = None
1213
1214         return p
1215
1216     # Bind the production function name to a callable
1217     def bind(self,pdict):
1218         if self.func:
1219             self.callable = pdict[self.func]
1220
1221 # This class serves as a minimal standin for Production objects when
1222 # reading table data from files.   It only contains information
1223 # actually used by the LR parsing engine, plus some additional
1224 # debugging information.
1225 class MiniProduction(object):
1226     def __init__(self,str,name,len,func,file,line):
1227         self.name     = name
1228         self.len      = len
1229         self.func     = func
1230         self.callable = None
1231         self.file     = file
1232         self.line     = line
1233         self.str      = str
1234     def __str__(self):
1235         return self.str
1236     def __repr__(self):
1237         return "MiniProduction(%s)" % self.str
1238
1239     # Bind the production function name to a callable
1240     def bind(self,pdict):
1241         if self.func:
1242             self.callable = pdict[self.func]
1243
1244
1245 # -----------------------------------------------------------------------------
1246 # class LRItem
1247 #
1248 # This class represents a specific stage of parsing a production rule.  For
1249 # example:
1250 #
1251 #       expr : expr . PLUS term
1252 #
1253 # In the above, the "." represents the current location of the parse.  Here
1254 # basic attributes:
1255 #
1256 #       name       - Name of the production.  For example 'expr'
1257 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1258 #       number     - Production number.
1259 #
1260 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1261 #                    then lr_next refers to 'expr -> expr PLUS . term'
1262 #       lr_index   - LR item index (location of the ".") in the prod list.
1263 #       lookaheads - LALR lookahead symbols for this item
1264 #       len        - Length of the production (number of symbols on right hand side)
1265 #       lr_after    - List of all productions that immediately follow
1266 #       lr_before   - Grammar symbol immediately before
1267 # -----------------------------------------------------------------------------
1268
1269 class LRItem(object):
1270     def __init__(self,p,n):
1271         self.name       = p.name
1272         self.prod       = list(p.prod)
1273         self.number     = p.number
1274         self.lr_index   = n
1275         self.lookaheads = { }
1276         self.prod.insert(n,".")
1277         self.prod       = tuple(self.prod)
1278         self.len        = len(self.prod)
1279         self.usyms      = p.usyms
1280
1281     def __str__(self):
1282         if self.prod:
1283             s = "%s -> %s" % (self.name," ".join(self.prod))
1284         else:
1285             s = "%s -> <empty>" % self.name
1286         return s
1287
1288     def __repr__(self):
1289         return "LRItem("+str(self)+")"
1290
1291 # -----------------------------------------------------------------------------
1292 # rightmost_terminal()
1293 #
1294 # Return the rightmost terminal from a list of symbols.  Used in add_production()
1295 # -----------------------------------------------------------------------------
1296 def rightmost_terminal(symbols, terminals):
1297     i = len(symbols) - 1
1298     while i >= 0:
1299         if symbols[i] in terminals:
1300             return symbols[i]
1301         i -= 1
1302     return None
1303
1304 # -----------------------------------------------------------------------------
1305 #                           === GRAMMAR CLASS ===
1306 #
1307 # The following class represents the contents of the specified grammar along
1308 # with various computed properties such as first sets, follow sets, LR items, etc.
1309 # This data is used for critical parts of the table generation process later.
1310 # -----------------------------------------------------------------------------
1311
1312 class GrammarError(YaccError): pass
1313
1314 class Grammar(object):
1315     def __init__(self,terminals):
1316         self.Productions  = [None]  # A list of all of the productions.  The first
1317                                     # entry is always reserved for the purpose of
1318                                     # building an augmented grammar
1319
1320         self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
1321                                     # productions of that nonterminal.
1322
1323         self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
1324                                     # productions.
1325
1326         self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
1327                                     # list of the rules where they are used.
1328
1329         for term in terminals:
1330             self.Terminals[term] = []
1331
1332         self.Terminals['error'] = []
1333
1334         self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
1335                                     # of rule numbers where they are used.
1336
1337         self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
1338
1339         self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
1340
1341         self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
1342                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
1343
1344         self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
1345                                     # This is only used to provide error checking and to generate
1346                                     # a warning about unused precedence rules.
1347
1348         self.Start = None           # Starting symbol for the grammar
1349
1350
1351     def __len__(self):
1352         return len(self.Productions)
1353
1354     def __getitem__(self,index):
1355         return self.Productions[index]
1356
1357     # -----------------------------------------------------------------------------
1358     # set_precedence()
1359     #
1360     # Sets the precedence for a given terminal. assoc is the associativity such as
1361     # 'left','right', or 'nonassoc'.  level is a numeric level.
1362     #
1363     # -----------------------------------------------------------------------------
1364
1365     def set_precedence(self,term,assoc,level):
1366         assert self.Productions == [None],"Must call set_precedence() before add_production()"
1367         if term in self.Precedence:
1368             raise GrammarError("Precedence already specified for terminal '%s'" % term)
1369         if assoc not in ['left','right','nonassoc']:
1370             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371         self.Precedence[term] = (assoc,level)
1372
1373     # -----------------------------------------------------------------------------
1374     # add_production()
1375     #
1376     # Given an action function, this function assembles a production rule and
1377     # computes its precedence level.
1378     #
1379     # The production rule is supplied as a list of symbols.   For example,
1380     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1381     # symbols ['expr','PLUS','term'].
1382     #
1383     # Precedence is determined by the precedence of the right-most non-terminal
1384     # or the precedence of a terminal specified by %prec.
1385     #
1386     # A variety of error checks are performed to make sure production symbols
1387     # are valid and that %prec is used correctly.
1388     # -----------------------------------------------------------------------------
1389
1390     def add_production(self,prodname,syms,func=None,file='',line=0):
1391
1392         if prodname in self.Terminals:
1393             raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1394         if prodname == 'error':
1395             raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1396         if not _is_identifier.match(prodname):
1397             raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1398
1399         # Look for literal tokens
1400         for n,s in enumerate(syms):
1401             if s[0] in "'\"":
1402                  try:
1403                      c = eval(s)
1404                      if (len(c) > 1):
1405                           raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1406                      if not c in self.Terminals:
1407                           self.Terminals[c] = []
1408                      syms[n] = c
1409                      continue
1410                  except SyntaxError:
1411                      pass
1412             if not _is_identifier.match(s) and s != '%prec':
1413                 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1414
1415         # Determine the precedence level
1416         if '%prec' in syms:
1417             if syms[-1] == '%prec':
1418                 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1419             if syms[-2] != '%prec':
1420                 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1421             precname = syms[-1]
1422             prodprec = self.Precedence.get(precname,None)
1423             if not prodprec:
1424                 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1425             else:
1426                 self.UsedPrecedence[precname] = 1
1427             del syms[-2:]     # Drop %prec from the rule
1428         else:
1429             # If no %prec, precedence is determined by the rightmost terminal symbol
1430             precname = rightmost_terminal(syms,self.Terminals)
1431             prodprec = self.Precedence.get(precname,('right',0))
1432
1433         # See if the rule is already in the rulemap
1434         map = "%s -> %s" % (prodname,syms)
1435         if map in self.Prodmap:
1436             m = self.Prodmap[map]
1437             raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1438                                "Previous definition at %s:%d" % (m.file, m.line))
1439
1440         # From this point on, everything is valid.  Create a new Production instance
1441         pnumber  = len(self.Productions)
1442         if not prodname in self.Nonterminals:
1443             self.Nonterminals[prodname] = [ ]
1444
1445         # Add the production number to Terminals and Nonterminals
1446         for t in syms:
1447             if t in self.Terminals:
1448                 self.Terminals[t].append(pnumber)
1449             else:
1450                 if not t in self.Nonterminals:
1451                     self.Nonterminals[t] = [ ]
1452                 self.Nonterminals[t].append(pnumber)
1453
1454         # Create a production and add it to the list of productions
1455         p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1456         self.Productions.append(p)
1457         self.Prodmap[map] = p
1458
1459         # Add to the global productions list
1460         try:
1461             self.Prodnames[prodname].append(p)
1462         except KeyError:
1463             self.Prodnames[prodname] = [ p ]
1464         return 0
1465
1466     # -----------------------------------------------------------------------------
1467     # set_start()
1468     #
1469     # Sets the starting symbol and creates the augmented grammar.  Production
1470     # rule 0 is S' -> start where start is the start symbol.
1471     # -----------------------------------------------------------------------------
1472
1473     def set_start(self,start=None):
1474         if not start:
1475             start = self.Productions[1].name
1476         if start not in self.Nonterminals:
1477             raise GrammarError("start symbol %s undefined" % start)
1478         self.Productions[0] = Production(0,"S'",[start])
1479         self.Nonterminals[start].append(0)
1480         self.Start = start
1481
1482     # -----------------------------------------------------------------------------
1483     # find_unreachable()
1484     #
1485     # Find all of the nonterminal symbols that can't be reached from the starting
1486     # symbol.  Returns a list of nonterminals that can't be reached.
1487     # -----------------------------------------------------------------------------
1488
1489     def find_unreachable(self):
1490
1491         # Mark all symbols that are reachable from a symbol s
1492         def mark_reachable_from(s):
1493             if reachable[s]:
1494                 # We've already reached symbol s.
1495                 return
1496             reachable[s] = 1
1497             for p in self.Prodnames.get(s,[]):
1498                 for r in p.prod:
1499                     mark_reachable_from(r)
1500
1501         reachable   = { }
1502         for s in list(self.Terminals) + list(self.Nonterminals):
1503             reachable[s] = 0
1504
1505         mark_reachable_from( self.Productions[0].prod[0] )
1506
1507         return [s for s in list(self.Nonterminals)
1508                         if not reachable[s]]
1509
1510     # -----------------------------------------------------------------------------
1511     # infinite_cycles()
1512     #
1513     # This function looks at the various parsing rules and tries to detect
1514     # infinite recursion cycles (grammar rules where there is no possible way
1515     # to derive a string of only terminals).
1516     # -----------------------------------------------------------------------------
1517
1518     def infinite_cycles(self):
1519         terminates = {}
1520
1521         # Terminals:
1522         for t in self.Terminals:
1523             terminates[t] = 1
1524
1525         terminates['$end'] = 1
1526
1527         # Nonterminals:
1528
1529         # Initialize to false:
1530         for n in self.Nonterminals:
1531             terminates[n] = 0
1532
1533         # Then propagate termination until no change:
1534         while 1:
1535             some_change = 0
1536             for (n,pl) in self.Prodnames.items():
1537                 # Nonterminal n terminates iff any of its productions terminates.
1538                 for p in pl:
1539                     # Production p terminates iff all of its rhs symbols terminate.
1540                     for s in p.prod:
1541                         if not terminates[s]:
1542                             # The symbol s does not terminate,
1543                             # so production p does not terminate.
1544                             p_terminates = 0
1545                             break
1546                     else:
1547                         # didn't break from the loop,
1548                         # so every symbol s terminates
1549                         # so production p terminates.
1550                         p_terminates = 1
1551
1552                     if p_terminates:
1553                         # symbol n terminates!
1554                         if not terminates[n]:
1555                             terminates[n] = 1
1556                             some_change = 1
1557                         # Don't need to consider any more productions for this n.
1558                         break
1559
1560             if not some_change:
1561                 break
1562
1563         infinite = []
1564         for (s,term) in terminates.items():
1565             if not term:
1566                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1567                     # s is used-but-not-defined, and we've already warned of that,
1568                     # so it would be overkill to say that it's also non-terminating.
1569                     pass
1570                 else:
1571                     infinite.append(s)
1572
1573         return infinite
1574
1575
1576     # -----------------------------------------------------------------------------
1577     # undefined_symbols()
1578     #
1579     # Find all symbols that were used the grammar, but not defined as tokens or
1580     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1581     # and prod is the production where the symbol was used.
1582     # -----------------------------------------------------------------------------
1583     def undefined_symbols(self):
1584         result = []
1585         for p in self.Productions:
1586             if not p: continue
1587
1588             for s in p.prod:
1589                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1590                     result.append((s,p))
1591         return result
1592
1593     # -----------------------------------------------------------------------------
1594     # unused_terminals()
1595     #
1596     # Find all terminals that were defined, but not used by the grammar.  Returns
1597     # a list of all symbols.
1598     # -----------------------------------------------------------------------------
1599     def unused_terminals(self):
1600         unused_tok = []
1601         for s,v in self.Terminals.items():
1602             if s != 'error' and not v:
1603                 unused_tok.append(s)
1604
1605         return unused_tok
1606
1607     # ------------------------------------------------------------------------------
1608     # unused_rules()
1609     #
1610     # Find all grammar rules that were defined,  but not used (maybe not reachable)
1611     # Returns a list of productions.
1612     # ------------------------------------------------------------------------------
1613
1614     def unused_rules(self):
1615         unused_prod = []
1616         for s,v in self.Nonterminals.items():
1617             if not v:
1618                 p = self.Prodnames[s][0]
1619                 unused_prod.append(p)
1620         return unused_prod
1621
1622     # -----------------------------------------------------------------------------
1623     # unused_precedence()
1624     #
1625     # Returns a list of tuples (term,precedence) corresponding to precedence
1626     # rules that were never used by the grammar.  term is the name of the terminal
1627     # on which precedence was applied and precedence is a string such as 'left' or
1628     # 'right' corresponding to the type of precedence.
1629     # -----------------------------------------------------------------------------
1630
1631     def unused_precedence(self):
1632         unused = []
1633         for termname in self.Precedence:
1634             if not (termname in self.Terminals or termname in self.UsedPrecedence):
1635                 unused.append((termname,self.Precedence[termname][0]))
1636
1637         return unused
1638
1639     # -------------------------------------------------------------------------
1640     # _first()
1641     #
1642     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1643     #
1644     # During execution of compute_first1, the result may be incomplete.
1645     # Afterward (e.g., when called from compute_follow()), it will be complete.
1646     # -------------------------------------------------------------------------
1647     def _first(self,beta):
1648
1649         # We are computing First(x1,x2,x3,...,xn)
1650         result = [ ]
1651         for x in beta:
1652             x_produces_empty = 0
1653
1654             # Add all the non-<empty> symbols of First[x] to the result.
1655             for f in self.First[x]:
1656                 if f == '<empty>':
1657                     x_produces_empty = 1
1658                 else:
1659                     if f not in result: result.append(f)
1660
1661             if x_produces_empty:
1662                 # We have to consider the next x in beta,
1663                 # i.e. stay in the loop.
1664                 pass
1665             else:
1666                 # We don't have to consider any further symbols in beta.
1667                 break
1668         else:
1669             # There was no 'break' from the loop,
1670             # so x_produces_empty was true for all x in beta,
1671             # so beta produces empty as well.
1672             result.append('<empty>')
1673
1674         return result
1675
1676     # -------------------------------------------------------------------------
1677     # compute_first()
1678     #
1679     # Compute the value of FIRST1(X) for all symbols
1680     # -------------------------------------------------------------------------
1681     def compute_first(self):
1682         if self.First:
1683             return self.First
1684
1685         # Terminals:
1686         for t in self.Terminals:
1687             self.First[t] = [t]
1688
1689         self.First['$end'] = ['$end']
1690
1691         # Nonterminals:
1692
1693         # Initialize to the empty set:
1694         for n in self.Nonterminals:
1695             self.First[n] = []
1696
1697         # Then propagate symbols until no change:
1698         while 1:
1699             some_change = 0
1700             for n in self.Nonterminals:
1701                 for p in self.Prodnames[n]:
1702                     for f in self._first(p.prod):
1703                         if f not in self.First[n]:
1704                             self.First[n].append( f )
1705                             some_change = 1
1706             if not some_change:
1707                 break
1708
1709         return self.First
1710
1711     # ---------------------------------------------------------------------
1712     # compute_follow()
1713     #
1714     # Computes all of the follow sets for every non-terminal symbol.  The
1715     # follow set is the set of all symbols that might follow a given
1716     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1717     # ---------------------------------------------------------------------
1718     def compute_follow(self,start=None):
1719         # If already computed, return the result
1720         if self.Follow:
1721             return self.Follow
1722
1723         # If first sets not computed yet, do that first.
1724         if not self.First:
1725             self.compute_first()
1726
1727         # Add '$end' to the follow list of the start symbol
1728         for k in self.Nonterminals:
1729             self.Follow[k] = [ ]
1730
1731         if not start:
1732             start = self.Productions[1].name
1733
1734         self.Follow[start] = [ '$end' ]
1735
1736         while 1:
1737             didadd = 0
1738             for p in self.Productions[1:]:
1739                 # Here is the production set
1740                 for i in range(len(p.prod)):
1741                     B = p.prod[i]
1742                     if B in self.Nonterminals:
1743                         # Okay. We got a non-terminal in a production
1744                         fst = self._first(p.prod[i+1:])
1745                         hasempty = 0
1746                         for f in fst:
1747                             if f != '<empty>' and f not in self.Follow[B]:
1748                                 self.Follow[B].append(f)
1749                                 didadd = 1
1750                             if f == '<empty>':
1751                                 hasempty = 1
1752                         if hasempty or i == (len(p.prod)-1):
1753                             # Add elements of follow(a) to follow(b)
1754                             for f in self.Follow[p.name]:
1755                                 if f not in self.Follow[B]:
1756                                     self.Follow[B].append(f)
1757                                     didadd = 1
1758             if not didadd: break
1759         return self.Follow
1760
1761
1762     # -----------------------------------------------------------------------------
1763     # build_lritems()
1764     #
1765     # This function walks the list of productions and builds a complete set of the
1766     # LR items.  The LR items are stored in two ways:  First, they are uniquely
1767     # numbered and placed in the list _lritems.  Second, a linked list of LR items
1768     # is built for each production.  For example:
1769     #
1770     #   E -> E PLUS E
1771     #
1772     # Creates the list
1773     #
1774     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1775     # -----------------------------------------------------------------------------
1776
1777     def build_lritems(self):
1778         for p in self.Productions:
1779             lastlri = p
1780             i = 0
1781             lr_items = []
1782             while 1:
1783                 if i > len(p):
1784                     lri = None
1785                 else:
1786                     lri = LRItem(p,i)
1787                     # Precompute the list of productions immediately following
1788                     try:
1789                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
1790                     except (IndexError,KeyError):
1791                         lri.lr_after = []
1792                     try:
1793                         lri.lr_before = lri.prod[i-1]
1794                     except IndexError:
1795                         lri.lr_before = None
1796
1797                 lastlri.lr_next = lri
1798                 if not lri: break
1799                 lr_items.append(lri)
1800                 lastlri = lri
1801                 i += 1
1802             p.lr_items = lr_items
1803
1804 # -----------------------------------------------------------------------------
1805 #                            == Class LRTable ==
1806 #
1807 # This basic class represents a basic table of LR parsing information.
1808 # Methods for generating the tables are not defined here.  They are defined
1809 # in the derived class LRGeneratedTable.
1810 # -----------------------------------------------------------------------------
1811
1812 class VersionError(YaccError): pass
1813
1814 class LRTable(object):
1815     def __init__(self):
1816         self.lr_action = None
1817         self.lr_goto = None
1818         self.lr_productions = None
1819         self.lr_method = None
1820
1821     def read_table(self,module):
1822         if isinstance(module,types.ModuleType):
1823             parsetab = module
1824         else:
1825             if sys.version_info[0] < 3:
1826                 exec("import %s as parsetab" % module)
1827             else:
1828                 env = { }
1829                 exec("import %s as parsetab" % module, env, env)
1830                 parsetab = env['parsetab']
1831
1832         if parsetab._tabversion != __tabversion__:
1833             raise VersionError("yacc table file version is out of date")
1834
1835         self.lr_action = parsetab._lr_action
1836         self.lr_goto = parsetab._lr_goto
1837
1838         self.lr_productions = []
1839         for p in parsetab._lr_productions:
1840             self.lr_productions.append(MiniProduction(*p))
1841
1842         self.lr_method = parsetab._lr_method
1843         return parsetab._lr_signature
1844
1845     def read_pickle(self,filename):
1846         try:
1847             import cPickle as pickle
1848         except ImportError:
1849             import pickle
1850
1851         in_f = open(filename,"rb")
1852
1853         tabversion = pickle.load(in_f)
1854         if tabversion != __tabversion__:
1855             raise VersionError("yacc table file version is out of date")
1856         self.lr_method = pickle.load(in_f)
1857         signature      = pickle.load(in_f)
1858         self.lr_action = pickle.load(in_f)
1859         self.lr_goto   = pickle.load(in_f)
1860         productions    = pickle.load(in_f)
1861
1862         self.lr_productions = []
1863         for p in productions:
1864             self.lr_productions.append(MiniProduction(*p))
1865
1866         in_f.close()
1867         return signature
1868
1869     # Bind all production function names to callable objects in pdict
1870     def bind_callables(self,pdict):
1871         for p in self.lr_productions:
1872             p.bind(pdict)
1873
1874 # -----------------------------------------------------------------------------
1875 #                           === LR Generator ===
1876 #
1877 # The following classes and functions are used to generate LR parsing tables on
1878 # a grammar.
1879 # -----------------------------------------------------------------------------
1880
1881 # -----------------------------------------------------------------------------
1882 # digraph()
1883 # traverse()
1884 #
1885 # The following two functions are used to compute set valued functions
1886 # of the form:
1887 #
1888 #     F(x) = F'(x) U U{F(y) | x R y}
1889 #
1890 # This is used to compute the values of Read() sets as well as FOLLOW sets
1891 # in LALR(1) generation.
1892 #
1893 # Inputs:  X    - An input set
1894 #          R    - A relation
1895 #          FP   - Set-valued function
1896 # ------------------------------------------------------------------------------
1897
1898 def digraph(X,R,FP):
1899     N = { }
1900     for x in X:
1901        N[x] = 0
1902     stack = []
1903     F = { }
1904     for x in X:
1905         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1906     return F
1907
1908 def traverse(x,N,stack,F,X,R,FP):
1909     stack.append(x)
1910     d = len(stack)
1911     N[x] = d
1912     F[x] = FP(x)             # F(X) <- F'(x)
1913
1914     rel = R(x)               # Get y's related to x
1915     for y in rel:
1916         if N[y] == 0:
1917              traverse(y,N,stack,F,X,R,FP)
1918         N[x] = min(N[x],N[y])
1919         for a in F.get(y,[]):
1920             if a not in F[x]: F[x].append(a)
1921     if N[x] == d:
1922        N[stack[-1]] = MAXINT
1923        F[stack[-1]] = F[x]
1924        element = stack.pop()
1925        while element != x:
1926            N[stack[-1]] = MAXINT
1927            F[stack[-1]] = F[x]
1928            element = stack.pop()
1929
1930 class LALRError(YaccError): pass
1931
1932 # -----------------------------------------------------------------------------
1933 #                             == LRGeneratedTable ==
1934 #
1935 # This class implements the LR table generation algorithm.  There are no
1936 # public methods except for write()
1937 # -----------------------------------------------------------------------------
1938
1939 class LRGeneratedTable(LRTable):
1940     def __init__(self,grammar,method='LALR',log=None):
1941         if method not in ['SLR','LALR']:
1942             raise LALRError("Unsupported method %s" % method)
1943
1944         self.grammar = grammar
1945         self.lr_method = method
1946
1947         # Set up the logger
1948         if not log:
1949             log = NullLogger()
1950         self.log = log
1951
1952         # Internal attributes
1953         self.lr_action     = {}        # Action table
1954         self.lr_goto       = {}        # Goto table
1955         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
1956         self.lr_goto_cache = {}        # Cache of computed gotos
1957         self.lr0_cidhash   = {}        # Cache of closures
1958
1959         self._add_count    = 0         # Internal counter used to detect cycles
1960
1961         # Diagonistic information filled in by the table generator
1962         self.sr_conflict   = 0
1963         self.rr_conflict   = 0
1964         self.conflicts     = []        # List of conflicts
1965
1966         self.sr_conflicts  = []
1967         self.rr_conflicts  = []
1968
1969         # Build the tables
1970         self.grammar.build_lritems()
1971         self.grammar.compute_first()
1972         self.grammar.compute_follow()
1973         self.lr_parse_table()
1974
1975     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1976
1977     def lr0_closure(self,I):
1978         self._add_count += 1
1979
1980         # Add everything in I to J
1981         J = I[:]
1982         didadd = 1
1983         while didadd:
1984             didadd = 0
1985             for j in J:
1986                 for x in j.lr_after:
1987                     if getattr(x,"lr0_added",0) == self._add_count: continue
1988                     # Add B --> .G to J
1989                     J.append(x.lr_next)
1990                     x.lr0_added = self._add_count
1991                     didadd = 1
1992
1993         return J
1994
1995     # Compute the LR(0) goto function goto(I,X) where I is a set
1996     # of LR(0) items and X is a grammar symbol.   This function is written
1997     # in a way that guarantees uniqueness of the generated goto sets
1998     # (i.e. the same goto set will never be returned as two different Python
1999     # objects).  With uniqueness, we can later do fast set comparisons using
2000     # id(obj) instead of element-wise comparison.
2001
2002     def lr0_goto(self,I,x):
2003         # First we look for a previously cached entry
2004         g = self.lr_goto_cache.get((id(I),x),None)
2005         if g: return g
2006
2007         # Now we generate the goto set in a way that guarantees uniqueness
2008         # of the result
2009
2010         s = self.lr_goto_cache.get(x,None)
2011         if not s:
2012             s = { }
2013             self.lr_goto_cache[x] = s
2014
2015         gs = [ ]
2016         for p in I:
2017             n = p.lr_next
2018             if n and n.lr_before == x:
2019                 s1 = s.get(id(n),None)
2020                 if not s1:
2021                     s1 = { }
2022                     s[id(n)] = s1
2023                 gs.append(n)
2024                 s = s1
2025         g = s.get('$end',None)
2026         if not g:
2027             if gs:
2028                 g = self.lr0_closure(gs)
2029                 s['$end'] = g
2030             else:
2031                 s['$end'] = gs
2032         self.lr_goto_cache[(id(I),x)] = g
2033         return g
2034
2035     # Compute the LR(0) sets of item function
2036     def lr0_items(self):
2037
2038         C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2039         i = 0
2040         for I in C:
2041             self.lr0_cidhash[id(I)] = i
2042             i += 1
2043
2044         # Loop over the items in C and each grammar symbols
2045         i = 0
2046         while i < len(C):
2047             I = C[i]
2048             i += 1
2049
2050             # Collect all of the symbols that could possibly be in the goto(I,X) sets
2051             asyms = { }
2052             for ii in I:
2053                 for s in ii.usyms:
2054                     asyms[s] = None
2055
2056             for x in asyms:
2057                 g = self.lr0_goto(I,x)
2058                 if not g:  continue
2059                 if id(g) in self.lr0_cidhash: continue
2060                 self.lr0_cidhash[id(g)] = len(C)
2061                 C.append(g)
2062
2063         return C
2064
2065     # -----------------------------------------------------------------------------
2066     #                       ==== LALR(1) Parsing ====
2067     #
2068     # LALR(1) parsing is almost exactly the same as SLR except that instead of
2069     # relying upon Follow() sets when performing reductions, a more selective
2070     # lookahead set that incorporates the state of the LR(0) machine is utilized.
2071     # Thus, we mainly just have to focus on calculating the lookahead sets.
2072     #
2073     # The method used here is due to DeRemer and Pennelo (1982).
2074     #
2075     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2076     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2077     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2078     #
2079     # Further details can also be found in:
2080     #
2081     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2082     #      McGraw-Hill Book Company, (1985).
2083     #
2084     # -----------------------------------------------------------------------------
2085
2086     # -----------------------------------------------------------------------------
2087     # compute_nullable_nonterminals()
2088     #
2089     # Creates a dictionary containing all of the non-terminals that might produce
2090     # an empty production.
2091     # -----------------------------------------------------------------------------
2092
2093     def compute_nullable_nonterminals(self):
2094         nullable = {}
2095         num_nullable = 0
2096         while 1:
2097            for p in self.grammar.Productions[1:]:
2098                if p.len == 0:
2099                     nullable[p.name] = 1
2100                     continue
2101                for t in p.prod:
2102                     if not t in nullable: break
2103                else:
2104                     nullable[p.name] = 1
2105            if len(nullable) == num_nullable: break
2106            num_nullable = len(nullable)
2107         return nullable
2108
2109     # -----------------------------------------------------------------------------
2110     # find_nonterminal_trans(C)
2111     #
2112     # Given a set of LR(0) items, this functions finds all of the non-terminal
2113     # transitions.    These are transitions in which a dot appears immediately before
2114     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2115     # is the state number and N is the nonterminal symbol.
2116     #
2117     # The input C is the set of LR(0) items.
2118     # -----------------------------------------------------------------------------
2119
2120     def find_nonterminal_transitions(self,C):
2121          trans = []
2122          for state in range(len(C)):
2123              for p in C[state]:
2124                  if p.lr_index < p.len - 1:
2125                       t = (state,p.prod[p.lr_index+1])
2126                       if t[1] in self.grammar.Nonterminals:
2127                             if t not in trans: trans.append(t)
2128              state = state + 1
2129          return trans
2130
2131     # -----------------------------------------------------------------------------
2132     # dr_relation()
2133     #
2134     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2135     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2136     #
2137     # Returns a list of terminals.
2138     # -----------------------------------------------------------------------------
2139
2140     def dr_relation(self,C,trans,nullable):
2141         dr_set = { }
2142         state,N = trans
2143         terms = []
2144
2145         g = self.lr0_goto(C[state],N)
2146         for p in g:
2147            if p.lr_index < p.len - 1:
2148                a = p.prod[p.lr_index+1]
2149                if a in self.grammar.Terminals:
2150                    if a not in terms: terms.append(a)
2151
2152         # This extra bit is to handle the start state
2153         if state == 0 and N == self.grammar.Productions[0].prod[0]:
2154            terms.append('$end')
2155
2156         return terms
2157
2158     # -----------------------------------------------------------------------------
2159     # reads_relation()
2160     #
2161     # Computes the READS() relation (p,A) READS (t,C).
2162     # -----------------------------------------------------------------------------
2163
2164     def reads_relation(self,C, trans, empty):
2165         # Look for empty transitions
2166         rel = []
2167         state, N = trans
2168
2169         g = self.lr0_goto(C[state],N)
2170         j = self.lr0_cidhash.get(id(g),-1)
2171         for p in g:
2172             if p.lr_index < p.len - 1:
2173                  a = p.prod[p.lr_index + 1]
2174                  if a in empty:
2175                       rel.append((j,a))
2176
2177         return rel
2178
2179     # -----------------------------------------------------------------------------
2180     # compute_lookback_includes()
2181     #
2182     # Determines the lookback and includes relations
2183     #
2184     # LOOKBACK:
2185     #
2186     # This relation is determined by running the LR(0) state machine forward.
2187     # For example, starting with a production "N : . A B C", we run it forward
2188     # to obtain "N : A B C ."   We then build a relationship between this final
2189     # state and the starting state.   These relationships are stored in a dictionary
2190     # lookdict.
2191     #
2192     # INCLUDES:
2193     #
2194     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2195     #
2196     # This relation is used to determine non-terminal transitions that occur
2197     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2198     # if the following holds:
2199     #
2200     #       B -> LAT, where T -> epsilon and p' -L-> p
2201     #
2202     # L is essentially a prefix (which may be empty), T is a suffix that must be
2203     # able to derive an empty string.  State p' must lead to state p with the string L.
2204     #
2205     # -----------------------------------------------------------------------------
2206
2207     def compute_lookback_includes(self,C,trans,nullable):
2208
2209         lookdict = {}          # Dictionary of lookback relations
2210         includedict = {}       # Dictionary of include relations
2211
2212         # Make a dictionary of non-terminal transitions
2213         dtrans = {}
2214         for t in trans:
2215             dtrans[t] = 1
2216
2217         # Loop over all transitions and compute lookbacks and includes
2218         for state,N in trans:
2219             lookb = []
2220             includes = []
2221             for p in C[state]:
2222                 if p.name != N: continue
2223
2224                 # Okay, we have a name match.  We now follow the production all the way
2225                 # through the state machine until we get the . on the right hand side
2226
2227                 lr_index = p.lr_index
2228                 j = state
2229                 while lr_index < p.len - 1:
2230                      lr_index = lr_index + 1
2231                      t = p.prod[lr_index]
2232
2233                      # Check to see if this symbol and state are a non-terminal transition
2234                      if (j,t) in dtrans:
2235                            # Yes.  Okay, there is some chance that this is an includes relation
2236                            # the only way to know for certain is whether the rest of the
2237                            # production derives empty
2238
2239                            li = lr_index + 1
2240                            while li < p.len:
2241                                 if p.prod[li] in self.grammar.Terminals: break      # No forget it
2242                                 if not p.prod[li] in nullable: break
2243                                 li = li + 1
2244                            else:
2245                                 # Appears to be a relation between (j,t) and (state,N)
2246                                 includes.append((j,t))
2247
2248                      g = self.lr0_goto(C[j],t)               # Go to next set
2249                      j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
2250
2251                 # When we get here, j is the final state, now we have to locate the production
2252                 for r in C[j]:
2253                      if r.name != p.name: continue
2254                      if r.len != p.len:   continue
2255                      i = 0
2256                      # This look is comparing a production ". A B C" with "A B C ."
2257                      while i < r.lr_index:
2258                           if r.prod[i] != p.prod[i+1]: break
2259                           i = i + 1
2260                      else:
2261                           lookb.append((j,r))
2262             for i in includes:
2263                  if not i in includedict: includedict[i] = []
2264                  includedict[i].append((state,N))
2265             lookdict[(state,N)] = lookb
2266
2267         return lookdict,includedict
2268
2269     # -----------------------------------------------------------------------------
2270     # compute_read_sets()
2271     #
2272     # Given a set of LR(0) items, this function computes the read sets.
2273     #
2274     # Inputs:  C        =  Set of LR(0) items
2275     #          ntrans   = Set of nonterminal transitions
2276     #          nullable = Set of empty transitions
2277     #
2278     # Returns a set containing the read sets
2279     # -----------------------------------------------------------------------------
2280
2281     def compute_read_sets(self,C, ntrans, nullable):
2282         FP = lambda x: self.dr_relation(C,x,nullable)
2283         R =  lambda x: self.reads_relation(C,x,nullable)
2284         F = digraph(ntrans,R,FP)
2285         return F
2286
2287     # -----------------------------------------------------------------------------
2288     # compute_follow_sets()
2289     #
2290     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2291     # and an include set, this function computes the follow sets
2292     #
2293     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2294     #
2295     # Inputs:
2296     #            ntrans     = Set of nonterminal transitions
2297     #            readsets   = Readset (previously computed)
2298     #            inclsets   = Include sets (previously computed)
2299     #
2300     # Returns a set containing the follow sets
2301     # -----------------------------------------------------------------------------
2302
2303     def compute_follow_sets(self,ntrans,readsets,inclsets):
2304          FP = lambda x: readsets[x]
2305          R  = lambda x: inclsets.get(x,[])
2306          F = digraph(ntrans,R,FP)
2307          return F
2308
2309     # -----------------------------------------------------------------------------
2310     # add_lookaheads()
2311     #
2312     # Attaches the lookahead symbols to grammar rules.
2313     #
2314     # Inputs:    lookbacks         -  Set of lookback relations
2315     #            followset         -  Computed follow set
2316     #
2317     # This function directly attaches the lookaheads to productions contained
2318     # in the lookbacks set
2319     # -----------------------------------------------------------------------------
2320
2321     def add_lookaheads(self,lookbacks,followset):
2322         for trans,lb in lookbacks.items():
2323             # Loop over productions in lookback
2324             for state,p in lb:
2325                  if not state in p.lookaheads:
2326                       p.lookaheads[state] = []
2327                  f = followset.get(trans,[])
2328                  for a in f:
2329                       if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2330
2331     # -----------------------------------------------------------------------------
2332     # add_lalr_lookaheads()
2333     #
2334     # This function does all of the work of adding lookahead information for use
2335     # with LALR parsing
2336     # -----------------------------------------------------------------------------
2337
2338     def add_lalr_lookaheads(self,C):
2339         # Determine all of the nullable nonterminals
2340         nullable = self.compute_nullable_nonterminals()
2341
2342         # Find all non-terminal transitions
2343         trans = self.find_nonterminal_transitions(C)
2344
2345         # Compute read sets
2346         readsets = self.compute_read_sets(C,trans,nullable)
2347
2348         # Compute lookback/includes relations
2349         lookd, included = self.compute_lookback_includes(C,trans,nullable)
2350
2351         # Compute LALR FOLLOW sets
2352         followsets = self.compute_follow_sets(trans,readsets,included)
2353
2354         # Add all of the lookaheads
2355         self.add_lookaheads(lookd,followsets)
2356
2357     # -----------------------------------------------------------------------------
2358     # lr_parse_table()
2359     #
2360     # This function constructs the parse tables for SLR or LALR
2361     # -----------------------------------------------------------------------------
2362     def lr_parse_table(self):
2363         Productions = self.grammar.Productions
2364         Precedence  = self.grammar.Precedence
2365         goto   = self.lr_goto         # Goto array
2366         action = self.lr_action       # Action array
2367         log    = self.log             # Logger for output
2368
2369         actionp = { }                 # Action production array (temporary)
2370
2371         log.info("Parsing method: %s", self.lr_method)
2372
2373         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2374         # This determines the number of states
2375
2376         C = self.lr0_items()
2377
2378         if self.lr_method == 'LALR':
2379             self.add_lalr_lookaheads(C)
2380
2381         # Build the parser table, state by state
2382         st = 0
2383         for I in C:
2384             # Loop over each production in I
2385             actlist = [ ]              # List of actions
2386             st_action  = { }
2387             st_actionp = { }
2388             st_goto    = { }
2389             log.info("")
2390             log.info("state %d", st)
2391             log.info("")
2392             for p in I:
2393                 log.info("    (%d) %s", p.number, str(p))
2394             log.info("")
2395
2396             for p in I:
2397                     if p.len == p.lr_index + 1:
2398                         if p.name == "S'":
2399                             # Start symbol. Accept!
2400                             st_action["$end"] = 0
2401                             st_actionp["$end"] = p
2402                         else:
2403                             # We are at the end of a production.  Reduce!
2404                             if self.lr_method == 'LALR':
2405                                 laheads = p.lookaheads[st]
2406                             else:
2407                                 laheads = self.grammar.Follow[p.name]
2408                             for a in laheads:
2409                                 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2410                                 r = st_action.get(a,None)
2411                                 if r is not None:
2412                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
2413                                     if r > 0:
2414                                         # Need to decide on shift or reduce here
2415                                         # By default we favor shifting. Need to add
2416                                         # some precedence rules here.
2417                                         sprec,slevel = Productions[st_actionp[a].number].prec
2418                                         rprec,rlevel = Precedence.get(a,('right',0))
2419                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2420                                             # We really need to reduce here.
2421                                             st_action[a] = -p.number
2422                                             st_actionp[a] = p
2423                                             if not slevel and not rlevel:
2424                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2425                                                 self.sr_conflicts.append((st,a,'reduce'))
2426                                             Productions[p.number].reduced += 1
2427                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2428                                             st_action[a] = None
2429                                         else:
2430                                             # Hmmm. Guess we'll keep the shift
2431                                             if not rlevel:
2432                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2433                                                 self.sr_conflicts.append((st,a,'shift'))
2434                                     elif r < 0:
2435                                         # Reduce/reduce conflict.   In this case, we favor the rule
2436                                         # that was defined first in the grammar file
2437                                         oldp = Productions[-r]
2438                                         pp = Productions[p.number]
2439                                         if oldp.line > pp.line:
2440                                             st_action[a] = -p.number
2441                                             st_actionp[a] = p
2442                                             chosenp,rejectp = pp,oldp
2443                                             Productions[p.number].reduced += 1
2444                                             Productions[oldp.number].reduced -= 1
2445                                         else:
2446                                             chosenp,rejectp = oldp,pp
2447                                         self.rr_conflicts.append((st,chosenp,rejectp))
2448                                         log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2449                                     else:
2450                                         raise LALRError("Unknown conflict in state %d" % st)
2451                                 else:
2452                                     st_action[a] = -p.number
2453                                     st_actionp[a] = p
2454                                     Productions[p.number].reduced += 1
2455                     else:
2456                         i = p.lr_index
2457                         a = p.prod[i+1]       # Get symbol right after the "."
2458                         if a in self.grammar.Terminals:
2459                             g = self.lr0_goto(I,a)
2460                             j = self.lr0_cidhash.get(id(g),-1)
2461                             if j >= 0:
2462                                 # We are in a shift state
2463                                 actlist.append((a,p,"shift and go to state %d" % j))
2464                                 r = st_action.get(a,None)
2465                                 if r is not None:
2466                                     # Whoa have a shift/reduce or shift/shift conflict
2467                                     if r > 0:
2468                                         if r != j:
2469                                             raise LALRError("Shift/shift conflict in state %d" % st)
2470                                     elif r < 0:
2471                                         # Do a precedence check.
2472                                         #   -  if precedence of reduce rule is higher, we reduce.
2473                                         #   -  if precedence of reduce is same and left assoc, we reduce.
2474                                         #   -  otherwise we shift
2475                                         rprec,rlevel = Productions[st_actionp[a].number].prec
2476                                         sprec,slevel = Precedence.get(a,('right',0))
2477                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2478                                             # We decide to shift here... highest precedence to shift
2479                                             Productions[st_actionp[a].number].reduced -= 1
2480                                             st_action[a] = j
2481                                             st_actionp[a] = p
2482                                             if not rlevel:
2483                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2484                                                 self.sr_conflicts.append((st,a,'shift'))
2485                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2486                                             st_action[a] = None
2487                                         else:
2488                                             # Hmmm. Guess we'll keep the reduce
2489                                             if not slevel and not rlevel:
2490                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2491                                                 self.sr_conflicts.append((st,a,'reduce'))
2492
2493                                     else:
2494                                         raise LALRError("Unknown conflict in state %d" % st)
2495                                 else:
2496                                     st_action[a] = j
2497                                     st_actionp[a] = p
2498
2499             # Print the actions associated with each terminal
2500             _actprint = { }
2501             for a,p,m in actlist:
2502                 if a in st_action:
2503                     if p is st_actionp[a]:
2504                         log.info("    %-15s %s",a,m)
2505                         _actprint[(a,m)] = 1
2506             log.info("")
2507             # Print the actions that were not used. (debugging)
2508             not_used = 0
2509             for a,p,m in actlist:
2510                 if a in st_action:
2511                     if p is not st_actionp[a]:
2512                         if not (a,m) in _actprint:
2513                             log.debug("  ! %-15s [ %s ]",a,m)
2514                             not_used = 1
2515                             _actprint[(a,m)] = 1
2516             if not_used:
2517                 log.debug("")
2518
2519             # Construct the goto table for this state
2520
2521             nkeys = { }
2522             for ii in I:
2523                 for s in ii.usyms:
2524                     if s in self.grammar.Nonterminals:
2525                         nkeys[s] = None
2526             for n in nkeys:
2527                 g = self.lr0_goto(I,n)
2528                 j = self.lr0_cidhash.get(id(g),-1)
2529                 if j >= 0:
2530                     st_goto[n] = j
2531                     log.info("    %-30s shift and go to state %d",n,j)
2532
2533             action[st] = st_action
2534             actionp[st] = st_actionp
2535             goto[st] = st_goto
2536             st += 1
2537
2538
2539     # -----------------------------------------------------------------------------
2540     # write()
2541     #
2542     # This function writes the LR parsing tables to a file
2543     # -----------------------------------------------------------------------------
2544
2545     def write_table(self,modulename,outputdir='',signature=""):
2546         basemodulename = modulename.split(".")[-1]
2547         filename = os.path.join(outputdir,basemodulename) + ".py"
2548         try:
2549             f = open(filename,"w")
2550
2551             f.write("""
2552 # %s
2553 # This file is automatically generated. Do not edit.
2554 _tabversion = %r
2555
2556 _lr_method = %r
2557
2558 _lr_signature = %r
2559     """ % (filename, __tabversion__, self.lr_method, signature))
2560
2561             # Change smaller to 0 to go back to original tables
2562             smaller = 1
2563
2564             # Factor out names to try and make smaller
2565             if smaller:
2566                 items = { }
2567
2568                 for s,nd in self.lr_action.items():
2569                    for name,v in nd.items():
2570                       i = items.get(name)
2571                       if not i:
2572                          i = ([],[])
2573                          items[name] = i
2574                       i[0].append(s)
2575                       i[1].append(v)
2576
2577                 f.write("\n_lr_action_items = {")
2578                 for k,v in items.items():
2579                     f.write("%r:([" % k)
2580                     for i in v[0]:
2581                         f.write("%r," % i)
2582                     f.write("],[")
2583                     for i in v[1]:
2584                         f.write("%r," % i)
2585
2586                     f.write("]),")
2587                 f.write("}\n")
2588
2589                 f.write("""
2590 _lr_action = { }
2591 for _k, _v in _lr_action_items.items():
2592    for _x,_y in zip(_v[0],_v[1]):
2593       if not _x in _lr_action:  _lr_action[_x] = { }
2594       _lr_action[_x][_k] = _y
2595 del _lr_action_items
2596 """)
2597
2598             else:
2599                 f.write("\n_lr_action = { ");
2600                 for k,v in self.lr_action.items():
2601                     f.write("(%r,%r):%r," % (k[0],k[1],v))
2602                 f.write("}\n");
2603
2604             if smaller:
2605                 # Factor out names to try and make smaller
2606                 items = { }
2607
2608                 for s,nd in self.lr_goto.items():
2609                    for name,v in nd.items():
2610                       i = items.get(name)
2611                       if not i:
2612                          i = ([],[])
2613                          items[name] = i
2614                       i[0].append(s)
2615                       i[1].append(v)
2616
2617                 f.write("\n_lr_goto_items = {")
2618                 for k,v in items.items():
2619                     f.write("%r:([" % k)
2620                     for i in v[0]:
2621                         f.write("%r," % i)
2622                     f.write("],[")
2623                     for i in v[1]:
2624                         f.write("%r," % i)
2625
2626                     f.write("]),")
2627                 f.write("}\n")
2628
2629                 f.write("""
2630 _lr_goto = { }
2631 for _k, _v in _lr_goto_items.items():
2632    for _x,_y in zip(_v[0],_v[1]):
2633        if not _x in _lr_goto: _lr_goto[_x] = { }
2634        _lr_goto[_x][_k] = _y
2635 del _lr_goto_items
2636 """)
2637             else:
2638                 f.write("\n_lr_goto = { ");
2639                 for k,v in self.lr_goto.items():
2640                     f.write("(%r,%r):%r," % (k[0],k[1],v))
2641                 f.write("}\n");
2642
2643             # Write production table
2644             f.write("_lr_productions = [\n")
2645             for p in self.lr_productions:
2646                 if p.func:
2647                     f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2648                 else:
2649                     f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2650             f.write("]\n")
2651             f.close()
2652
2653         except IOError:
2654             e = sys.exc_info()[1]
2655             sys.stderr.write("Unable to create '%s'\n" % filename)
2656             sys.stderr.write(str(e)+"\n")
2657             return
2658
2659
2660     # -----------------------------------------------------------------------------
2661     # pickle_table()
2662     #
2663     # This function pickles the LR parsing tables to a supplied file object
2664     # -----------------------------------------------------------------------------
2665
2666     def pickle_table(self,filename,signature=""):
2667         try:
2668             import cPickle as pickle
2669         except ImportError:
2670             import pickle
2671         outf = open(filename,"wb")
2672         pickle.dump(__tabversion__,outf,pickle_protocol)
2673         pickle.dump(self.lr_method,outf,pickle_protocol)
2674         pickle.dump(signature,outf,pickle_protocol)
2675         pickle.dump(self.lr_action,outf,pickle_protocol)
2676         pickle.dump(self.lr_goto,outf,pickle_protocol)
2677
2678         outp = []
2679         for p in self.lr_productions:
2680             if p.func:
2681                 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2682             else:
2683                 outp.append((str(p),p.name,p.len,None,None,None))
2684         pickle.dump(outp,outf,pickle_protocol)
2685         outf.close()
2686
2687 # -----------------------------------------------------------------------------
2688 #                            === INTROSPECTION ===
2689 #
2690 # The following functions and classes are used to implement the PLY
2691 # introspection features followed by the yacc() function itself.
2692 # -----------------------------------------------------------------------------
2693
2694 # -----------------------------------------------------------------------------
2695 # get_caller_module_dict()
2696 #
2697 # This function returns a dictionary containing all of the symbols defined within
2698 # a caller further down the call stack.  This is used to get the environment
2699 # associated with the yacc() call if none was provided.
2700 # -----------------------------------------------------------------------------
2701
2702 def get_caller_module_dict(levels):
2703     try:
2704         raise RuntimeError
2705     except RuntimeError:
2706         e,b,t = sys.exc_info()
2707         f = t.tb_frame
2708         while levels > 0:
2709             f = f.f_back
2710             levels -= 1
2711         ldict = f.f_globals.copy()
2712         if f.f_globals != f.f_locals:
2713             ldict.update(f.f_locals)
2714
2715         return ldict
2716
2717 # -----------------------------------------------------------------------------
2718 # parse_grammar()
2719 #
2720 # This takes a raw grammar rule string and parses it into production data
2721 # -----------------------------------------------------------------------------
2722 def parse_grammar(doc,file,line):
2723     grammar = []
2724     # Split the doc string into lines
2725     pstrings = doc.splitlines()
2726     lastp = None
2727     dline = line
2728     for ps in pstrings:
2729         dline += 1
2730         p = ps.split()
2731         if not p: continue
2732         try:
2733             if p[0] == '|':
2734                 # This is a continuation of a previous rule
2735                 if not lastp:
2736                     raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2737                 prodname = lastp
2738                 syms = p[1:]
2739             else:
2740                 prodname = p[0]
2741                 lastp = prodname
2742                 syms   = p[2:]
2743                 assign = p[1]
2744                 if assign != ':' and assign != '::=':
2745                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2746
2747             grammar.append((file,dline,prodname,syms))
2748         except SyntaxError:
2749             raise
2750         except Exception:
2751             raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2752
2753     return grammar
2754
2755 # -----------------------------------------------------------------------------
2756 # ParserReflect()
2757 #
2758 # This class represents information extracted for building a parser including
2759 # start symbol, error function, tokens, precedence list, action functions,
2760 # etc.
2761 # -----------------------------------------------------------------------------
2762 class ParserReflect(object):
2763     def __init__(self,pdict,log=None):
2764         self.pdict      = pdict
2765         self.start      = None
2766         self.error_func = None
2767         self.tokens     = None
2768         self.files      = {}
2769         self.grammar    = []
2770         self.error      = 0
2771
2772         if log is None:
2773             self.log = PlyLogger(sys.stderr)
2774         else:
2775             self.log = log
2776
2777     # Get all of the basic information
2778     def get_all(self):
2779         self.get_start()
2780         self.get_error_func()
2781         self.get_tokens()
2782         self.get_precedence()
2783         self.get_pfunctions()
2784
2785     # Validate all of the information
2786     def validate_all(self):
2787         self.validate_start()
2788         self.validate_error_func()
2789         self.validate_tokens()
2790         self.validate_precedence()
2791         self.validate_pfunctions()
2792         self.validate_files()
2793         return self.error
2794
2795     # Compute a signature over the grammar
2796     def signature(self):
2797         try:
2798             from hashlib import md5
2799         except ImportError:
2800             from md5 import md5
2801         try:
2802             sig = md5()
2803             if self.start:
2804                 sig.update(self.start.encode('latin-1'))
2805             if self.prec:
2806                 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2807             if self.tokens:
2808                 sig.update(" ".join(self.tokens).encode('latin-1'))
2809             for f in self.pfuncs:
2810                 if f[3]:
2811                     sig.update(f[3].encode('latin-1'))
2812         except (TypeError,ValueError):
2813             pass
2814         return sig.digest()
2815
2816     # -----------------------------------------------------------------------------
2817     # validate_file()
2818     #
2819     # This method checks to see if there are duplicated p_rulename() functions
2820     # in the parser module file.  Without this function, it is really easy for
2821     # users to make mistakes by cutting and pasting code fragments (and it's a real
2822     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2823     # we just do a little regular expression pattern matching of def statements
2824     # to try and detect duplicates.
2825     # -----------------------------------------------------------------------------
2826
2827     def validate_files(self):
2828         # Match def p_funcname(
2829         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2830
2831         for filename in self.files.keys():
2832             base,ext = os.path.splitext(filename)
2833             if ext != '.py': return 1          # No idea. Assume it's okay.
2834
2835             try:
2836                 f = open(filename)
2837                 lines = f.readlines()
2838                 f.close()
2839             except IOError:
2840                 continue
2841
2842             counthash = { }
2843             for linen,l in enumerate(lines):
2844                 linen += 1
2845                 m = fre.match(l)
2846                 if m:
2847                     name = m.group(1)
2848                     prev = counthash.get(name)
2849                     if not prev:
2850                         counthash[name] = linen
2851                     else:
2852                         self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2853
2854     # Get the start symbol
2855     def get_start(self):
2856         self.start = self.pdict.get('start')
2857
2858     # Validate the start symbol
2859     def validate_start(self):
2860         if self.start is not None:
2861             if not isinstance(self.start,str):
2862                 self.log.error("'start' must be a string")
2863
2864     # Look for error handler
2865     def get_error_func(self):
2866         self.error_func = self.pdict.get('p_error')
2867
2868     # Validate the error function
2869     def validate_error_func(self):
2870         if self.error_func:
2871             if isinstance(self.error_func,types.FunctionType):
2872                 ismethod = 0
2873             elif isinstance(self.error_func, types.MethodType):
2874                 ismethod = 1
2875             else:
2876                 self.log.error("'p_error' defined, but is not a function or method")
2877                 self.error = 1
2878                 return
2879
2880             eline = func_code(self.error_func).co_firstlineno
2881             efile = func_code(self.error_func).co_filename
2882             self.files[efile] = 1
2883
2884             if (func_code(self.error_func).co_argcount != 1+ismethod):
2885                 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2886                 self.error = 1
2887
2888     # Get the tokens map
2889     def get_tokens(self):
2890         tokens = self.pdict.get("tokens",None)
2891         if not tokens:
2892             self.log.error("No token list is defined")
2893             self.error = 1
2894             return
2895
2896         if not isinstance(tokens,(list, tuple)):
2897             self.log.error("tokens must be a list or tuple")
2898             self.error = 1
2899             return
2900
2901         if not tokens:
2902             self.log.error("tokens is empty")
2903             self.error = 1
2904             return
2905
2906         self.tokens = tokens
2907
2908     # Validate the tokens
2909     def validate_tokens(self):
2910         # Validate the tokens.
2911         if 'error' in self.tokens:
2912             self.log.error("Illegal token name 'error'. Is a reserved word")
2913             self.error = 1
2914             return
2915
2916         terminals = {}
2917         for n in self.tokens:
2918             if n in terminals:
2919                 self.log.warning("Token '%s' multiply defined", n)
2920             terminals[n] = 1
2921
2922     # Get the precedence map (if any)
2923     def get_precedence(self):
2924         self.prec = self.pdict.get("precedence",None)
2925
2926     # Validate and parse the precedence map
2927     def validate_precedence(self):
2928         preclist = []
2929         if self.prec:
2930             if not isinstance(self.prec,(list,tuple)):
2931                 self.log.error("precedence must be a list or tuple")
2932                 self.error = 1
2933                 return
2934             for level,p in enumerate(self.prec):
2935                 if not isinstance(p,(list,tuple)):
2936                     self.log.error("Bad precedence table")
2937                     self.error = 1
2938                     return
2939
2940                 if len(p) < 2:
2941                     self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2942                     self.error = 1
2943                     return
2944                 assoc = p[0]
2945                 if not isinstance(assoc,str):
2946                     self.log.error("precedence associativity must be a string")
2947                     self.error = 1
2948                     return
2949                 for term in p[1:]:
2950                     if not isinstance(term,str):
2951                         self.log.error("precedence items must be strings")
2952                         self.error = 1
2953                         return
2954                     preclist.append((term,assoc,level+1))
2955         self.preclist = preclist
2956
2957     # Get all p_functions from the grammar
2958     def get_pfunctions(self):
2959         p_functions = []
2960         for name, item in self.pdict.items():
2961             if name[:2] != 'p_': continue
2962             if name == 'p_error': continue
2963             if isinstance(item,(types.FunctionType,types.MethodType)):
2964                 line = func_code(item).co_firstlineno
2965                 file = func_code(item).co_filename
2966                 p_functions.append((line,file,name,item.__doc__))
2967
2968         # Sort all of the actions by line number
2969         p_functions.sort()
2970         self.pfuncs = p_functions
2971
2972
2973     # Validate all of the p_functions
2974     def validate_pfunctions(self):
2975         grammar = []
2976         # Check for non-empty symbols
2977         if len(self.pfuncs) == 0:
2978             self.log.error("no rules of the form p_rulename are defined")
2979             self.error = 1
2980             return
2981
2982         for line, file, name, doc in self.pfuncs:
2983             func = self.pdict[name]
2984             if isinstance(func, types.MethodType):
2985                 reqargs = 2
2986             else:
2987                 reqargs = 1
2988             if func_code(func).co_argcount > reqargs:
2989                 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2990                 self.error = 1
2991             elif func_code(func).co_argcount < reqargs:
2992                 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2993                 self.error = 1
2994             elif not func.__doc__:
2995                 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2996             else:
2997                 try:
2998                     parsed_g = parse_grammar(doc,file,line)
2999                     for g in parsed_g:
3000                         grammar.append((name, g))
3001                 except SyntaxError:
3002                     e = sys.exc_info()[1]
3003                     self.log.error(str(e))
3004                     self.error = 1
3005
3006                 # Looks like a valid grammar rule
3007                 # Mark the file in which defined.
3008                 self.files[file] = 1
3009
3010         # Secondary validation step that looks for p_ definitions that are not functions
3011         # or functions that look like they might be grammar rules.
3012
3013         for n,v in self.pdict.items():
3014             if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3015             if n[0:2] == 't_': continue
3016             if n[0:2] == 'p_' and n != 'p_error':
3017                 self.log.warning("'%s' not defined as a function", n)
3018             if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3019                 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3020                 try:
3021                     doc = v.__doc__.split(" ")
3022                     if doc[1] == ':':
3023                         self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024                                          func_code(v).co_filename, func_code(v).co_firstlineno,n)
3025                 except Exception:
3026                     pass
3027
3028         self.grammar = grammar
3029
3030 # -----------------------------------------------------------------------------
3031 # yacc(module)
3032 #
3033 # Build a parser
3034 # -----------------------------------------------------------------------------
3035
3036 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3037          check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3038          debuglog=None, errorlog = None, picklefile=None):
3039
3040     global parse                 # Reference to the parsing method of the last built parser
3041
3042     # If pickling is enabled, table files are not created
3043
3044     if picklefile:
3045         write_tables = 0
3046
3047     if errorlog is None:
3048         errorlog = PlyLogger(sys.stderr)
3049
3050     # Get the module dictionary used for the parser
3051     if module:
3052         _items = [(k,getattr(module,k)) for k in dir(module)]
3053         pdict = dict(_items)
3054     else:
3055         pdict = get_caller_module_dict(2)
3056
3057     # Collect parser information from the dictionary
3058     pinfo = ParserReflect(pdict,log=errorlog)
3059     pinfo.get_all()
3060
3061     if pinfo.error:
3062         raise YaccError("Unable to build parser")
3063
3064     # Check signature against table files (if any)
3065     signature = pinfo.signature()
3066
3067     # Read the tables
3068     try:
3069         lr = LRTable()
3070         if picklefile:
3071             read_signature = lr.read_pickle(picklefile)
3072         else:
3073             read_signature = lr.read_table(tabmodule)
3074         if optimize or (read_signature == signature):
3075             try:
3076                 lr.bind_callables(pinfo.pdict)
3077                 parser = LRParser(lr,pinfo.error_func)
3078                 parse = parser.parse
3079                 return parser
3080             except Exception:
3081                 e = sys.exc_info()[1]
3082                 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3083     except VersionError:
3084         e = sys.exc_info()
3085         errorlog.warning(str(e))
3086     except Exception:
3087         pass
3088
3089     if debuglog is None:
3090         if debug:
3091             debuglog = PlyLogger(open(debugfile,"w"))
3092         else:
3093             debuglog = NullLogger()
3094
3095     debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3096
3097
3098     errors = 0
3099
3100     # Validate the parser information
3101     if pinfo.validate_all():
3102         raise YaccError("Unable to build parser")
3103
3104     if not pinfo.error_func:
3105         errorlog.warning("no p_error() function is defined")
3106
3107     # Create a grammar object
3108     grammar = Grammar(pinfo.tokens)
3109
3110     # Set precedence level for terminals
3111     for term, assoc, level in pinfo.preclist:
3112         try:
3113             grammar.set_precedence(term,assoc,level)
3114         except GrammarError:
3115             e = sys.exc_info()[1]
3116             errorlog.warning("%s",str(e))
3117
3118     # Add productions to the grammar
3119     for funcname, gram in pinfo.grammar:
3120         file, line, prodname, syms = gram
3121         try:
3122             grammar.add_production(prodname,syms,funcname,file,line)
3123         except GrammarError:
3124             e = sys.exc_info()[1]
3125             errorlog.error("%s",str(e))
3126             errors = 1
3127
3128     # Set the grammar start symbols
3129     try:
3130         if start is None:
3131             grammar.set_start(pinfo.start)
3132         else:
3133             grammar.set_start(start)
3134     except GrammarError:
3135         e = sys.exc_info()[1]
3136         errorlog.error(str(e))
3137         errors = 1
3138
3139     if errors:
3140         raise YaccError("Unable to build parser")
3141
3142     # Verify the grammar structure
3143     undefined_symbols = grammar.undefined_symbols()
3144     for sym, prod in undefined_symbols:
3145         errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3146         errors = 1
3147
3148     unused_terminals = grammar.unused_terminals()
3149     if unused_terminals:
3150         debuglog.info("")
3151         debuglog.info("Unused terminals:")
3152         debuglog.info("")
3153         for term in unused_terminals:
3154             errorlog.warning("Token '%s' defined, but not used", term)
3155             debuglog.info("    %s", term)
3156
3157     # Print out all productions to the debug log
3158     if debug:
3159         debuglog.info("")
3160         debuglog.info("Grammar")
3161         debuglog.info("")
3162         for n,p in enumerate(grammar.Productions):
3163             debuglog.info("Rule %-5d %s", n, p)
3164
3165     # Find unused non-terminals
3166     unused_rules = grammar.unused_rules()
3167     for prod in unused_rules:
3168         errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3169
3170     if len(unused_terminals) == 1:
3171         errorlog.warning("There is 1 unused token")
3172     if len(unused_terminals) > 1:
3173         errorlog.warning("There are %d unused tokens", len(unused_terminals))
3174
3175     if len(unused_rules) == 1:
3176         errorlog.warning("There is 1 unused rule")
3177     if len(unused_rules) > 1:
3178         errorlog.warning("There are %d unused rules", len(unused_rules))
3179
3180     if debug:
3181         debuglog.info("")
3182         debuglog.info("Terminals, with rules where they appear")
3183         debuglog.info("")
3184         terms = list(grammar.Terminals)
3185         terms.sort()
3186         for term in terms:
3187             debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3188
3189         debuglog.info("")
3190         debuglog.info("Nonterminals, with rules where they appear")
3191         debuglog.info("")
3192         nonterms = list(grammar.Nonterminals)
3193         nonterms.sort()
3194         for nonterm in nonterms:
3195             debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3196         debuglog.info("")
3197
3198     if check_recursion:
3199         unreachable = grammar.find_unreachable()
3200         for u in unreachable:
3201             errorlog.warning("Symbol '%s' is unreachable",u)
3202
3203         infinite = grammar.infinite_cycles()
3204         for inf in infinite:
3205             errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3206             errors = 1
3207
3208     unused_prec = grammar.unused_precedence()
3209     for term, assoc in unused_prec:
3210         errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3211         errors = 1
3212
3213     if errors:
3214         raise YaccError("Unable to build parser")
3215
3216     # Run the LRGeneratedTable on the grammar
3217     if debug:
3218         errorlog.debug("Generating %s tables", method)
3219
3220     lr = LRGeneratedTable(grammar,method,debuglog)
3221
3222     if debug:
3223         num_sr = len(lr.sr_conflicts)
3224
3225         # Report shift/reduce and reduce/reduce conflicts
3226         if num_sr == 1:
3227             errorlog.warning("1 shift/reduce conflict")
3228         elif num_sr > 1:
3229             errorlog.warning("%d shift/reduce conflicts", num_sr)
3230
3231         num_rr = len(lr.rr_conflicts)
3232         if num_rr == 1:
3233             errorlog.warning("1 reduce/reduce conflict")
3234         elif num_rr > 1:
3235             errorlog.warning("%d reduce/reduce conflicts", num_rr)
3236
3237     # Write out conflicts to the output file
3238     if debug and (lr.sr_conflicts or lr.rr_conflicts):
3239         debuglog.warning("")
3240         debuglog.warning("Conflicts:")
3241         debuglog.warning("")
3242
3243         for state, tok, resolution in lr.sr_conflicts:
3244             debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
3245
3246         already_reported = {}
3247         for state, rule, rejected in lr.rr_conflicts:
3248             if (state,id(rule),id(rejected)) in already_reported:
3249                 continue
3250             debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3251             debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3252             errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253             errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3254             already_reported[state,id(rule),id(rejected)] = 1
3255
3256         warned_never = []
3257         for state, rule, rejected in lr.rr_conflicts:
3258             if not rejected.reduced and (rejected not in warned_never):
3259                 debuglog.warning("Rule (%s) is never reduced", rejected)
3260                 errorlog.warning("Rule (%s) is never reduced", rejected)
3261                 warned_never.append(rejected)
3262
3263     # Write the table file if requested
3264     if write_tables:
3265         lr.write_table(tabmodule,outputdir,signature)
3266
3267     # Write a pickled version of the tables
3268     if picklefile:
3269         lr.pickle_table(picklefile,signature)
3270
3271     # Build the parser
3272     lr.bind_callables(pinfo.pdict)
3273     parser = LRParser(lr,pinfo.error_func)
3274
3275     parse = parser.parse
3276     return parser