tools/yacc.py

   1 # -----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Copyright (C) 2001-2011,
   5 # David M. Beazley (Dabeaz LLC)
   6 # All rights reserved.
   7 #
   8 # Redistribution and use in source and binary forms, with or without
   9 # modification, are permitted provided that the following conditions are
  10 # met:
  11 #
  12 # * Redistributions of source code must retain the above copyright notice,
  13 #   this list of conditions and the following disclaimer.
  14 # * Redistributions in binary form must reproduce the above copyright notice,
  15 #   this list of conditions and the following disclaimer in the documentation
  16 #   and/or other materials provided with the distribution.
  17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
  18 #   endorse or promote products derived from this software without
  19 #  specific prior written permission.
  20 #
  21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32 # -----------------------------------------------------------------------------
  33 #
  34 # This implements an LR parser that is constructed from grammar rules defined
  35 # as Python functions. The grammer is specified by supplying the BNF inside
  36 # Python documentation strings.  The inspiration for this technique was borrowed
  37 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
  38 # Spark and the GNU bison utility.
  39 #
  40 # The current implementation is only somewhat object-oriented. The
  41 # LR parser itself is defined in terms of an object (which allows multiple
  42 # parsers to co-exist).  However, most of the variables used during table
  43 # construction are defined in terms of global variables.  Users shouldn't
  44 # notice unless they are trying to define multiple parsers at the same
  45 # time using threads (in which case they should have their head examined).
  46 #
  47 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
  48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  50 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
  51 # by the more efficient DeRemer and Pennello algorithm.
  52 #
  53 # :::::::: WARNING :::::::
  54 #
  55 # Construction of LR parsing tables is fairly complicated and expensive.
  56 # To make this module run fast, a *LOT* of work has been put into
  57 # optimization---often at the expensive of readability and what might
  58 # consider to be good Python "coding style."   Modify the code at your
  59 # own risk!
  60 # ----------------------------------------------------------------------------
  61
  62 __version__    = "3.5"
  63 __tabversion__ = "3.2"       # Table version
  64
  65 #-----------------------------------------------------------------------------
  66 #                     === User configurable parameters ===
  67 #
  68 # Change these to modify the default behavior of yacc (if you wish)
  69 #-----------------------------------------------------------------------------
  70
  71 yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
  72                                # a 'parser.out' file in the current directory
  73
  74 debug_file  = 'parser.out'     # Default name of the debugging file
  75 tab_module  = 'parsetab'       # Default name of the table module
  76 default_lr  = 'LALR'           # Default LR table generation method
  77
  78 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  79
  80 yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
  81                                # implementations of certain functions.
  82
  83 resultlimit = 40               # Size limit of results when running in debug mode.
  84
  85 pickle_protocol = 0            # Protocol to use when writing pickle files
  86
  87 import re, types, sys, os.path, inspect
  88
  89 # Compatibility function for python 2.6/3.0
  90 if sys.version_info[0] < 3:
  91     def func_code(f):
  92         return f.func_code
  93 else:
  94     def func_code(f):
  95         return f.__code__
  96
  97 # String type-checking compatibility
  98 if sys.version_info[0] < 3:
  99     string_types = basestring
 100 else:
 101     string_types = str
 102
 103 # Compatibility
 104 try:
 105     MAXINT = sys.maxint
 106 except AttributeError:
 107     MAXINT = sys.maxsize
 108
 109 # Python 2.x/3.0 compatibility.
 110 def load_ply_lex():
 111     if sys.version_info[0] < 3:
 112         import lex
 113     else:
 114         import ply.lex as lex
 115     return lex
 116
 117 # This object is a stand-in for a logging object created by the
 118 # logging module.   PLY will use this by default to create things
 119 # such as the parser.out file.  If a user wants more detailed
 120 # information, they can create their own logging object and pass
 121 # it into PLY.
 122
 123 class PlyLogger(object):
 124     def __init__(self,f):
 125         self.f = f
 126     def debug(self,msg,*args,**kwargs):
 127         self.f.write((msg % args) + "\n")
 128     info     = debug
 129
 130     def warning(self,msg,*args,**kwargs):
 131         self.f.write("WARNING: "+ (msg % args) + "\n")
 132
 133     def error(self,msg,*args,**kwargs):
 134         self.f.write("ERROR: " + (msg % args) + "\n")
 135
 136     critical = debug
 137
 138 # Null logger is used when no output is generated. Does nothing.
 139 class NullLogger(object):
 140     def __getattribute__(self,name):
 141         return self
 142     def __call__(self,*args,**kwargs):
 143         return self
 144
 145 # Exception raised for yacc-related errors
 146 class YaccError(Exception):   pass
 147
 148 # Format the result message that the parser produces when running in debug mode.
 149 def format_result(r):
 150     repr_str = repr(r)
 151     if '\n' in repr_str: repr_str = repr(repr_str)
 152     if len(repr_str) > resultlimit:
 153         repr_str = repr_str[:resultlimit]+" ..."
 154     result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
 155     return result
 156
 157
 158 # Format stack entries when the parser is running in debug mode
 159 def format_stack_entry(r):
 160     repr_str = repr(r)
 161     if '\n' in repr_str: repr_str = repr(repr_str)
 162     if len(repr_str) < 16:
 163         return repr_str
 164     else:
 165         return "<%s @ 0x%x>" % (type(r).__name__,id(r))
 166
 167 # Panic mode error recovery support.   This feature is being reworked--much of the
 168 # code here is to offer a deprecation/backwards compatible transition
 169
 170 _errok = None
 171 _token = None
 172 _restart = None
 173 _warnmsg = """PLY: Don't use global functions errok(), token(), and restart() in p_error().
 174 Instead, invoke the methods on the associated parser instance:
 175
 176     def p_error(p):
 177         ...
 178         # Use parser.errok(), parser.token(), parser.restart()
 179         ...
 180
 181     parser = yacc.yacc()
 182 """
 183 import warnings
 184 def errok():
 185     warnings.warn(_warnmsg)
 186     return _errok()
 187
 188 def restart():
 189     warnings.warn(_warnmsg)
 190     return _restart()
 191
 192 def token():
 193     warnings.warn(_warnmsg)
 194     return _token()
 195
 196 # Utility function to call the p_error() function with some deprecation hacks
 197 def call_errorfunc(errorfunc,token,parser):
 198     global _errok, _token, _restart
 199     _errok = parser.errok
 200     _token = parser.token
 201     _restart = parser.restart
 202     r = errorfunc(token)
 203     del _errok, _token, _restart
 204
 205 #-----------------------------------------------------------------------------
 206 #                        ===  LR Parsing Engine ===
 207 #
 208 # The following classes are used for the LR parser itself.  These are not
 209 # used during table construction and are independent of the actual LR
 210 # table generation algorithm
 211 #-----------------------------------------------------------------------------
 212
 213 # This class is used to hold non-terminal grammar symbols during parsing.
 214 # It normally has the following attributes set:
 215 #        .type       = Grammar symbol type
 216 #        .value      = Symbol value
 217 #        .lineno     = Starting line number
 218 #        .endlineno  = Ending line number (optional, set automatically)
 219 #        .lexpos     = Starting lex position
 220 #        .endlexpos  = Ending lex position (optional, set automatically)
 221
 222 class YaccSymbol:
 223     def __str__(self):    return self.type
 224     def __repr__(self):   return str(self)
 225
 226 # This class is a wrapper around the objects actually passed to each
 227 # grammar rule.   Index lookup and assignment actually assign the
 228 # .value attribute of the underlying YaccSymbol object.
 229 # The lineno() method returns the line number of a given
 230 # item (or 0 if not defined).   The linespan() method returns
 231 # a tuple of (startline,endline) representing the range of lines
 232 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 233 # representing the range of positional information for a symbol.
 234
 235 class YaccProduction:
 236     def __init__(self,s,stack=None):
 237         self.slice = s
 238         self.stack = stack
 239         self.lexer = None
 240         self.parser= None
 241     def __getitem__(self,n):
 242         if isinstance(n, slice):
 243             return [s.value for s in self.slice[n]]
 244         elif n >= 0:
 245             return self.slice[n].value
 246         else:
 247             return self.stack[n].value
 248
 249     def __setitem__(self,n,v):
 250         self.slice[n].value = v
 251
 252     def __getslice__(self,i,j):
 253         return [s.value for s in self.slice[i:j]]
 254
 255     def __len__(self):
 256         return len(self.slice)
 257
 258     def lineno(self,n):
 259         return getattr(self.slice[n],"lineno",0)
 260
 261     def set_lineno(self,n,lineno):
 262         self.slice[n].lineno = lineno
 263
 264     def linespan(self,n):
 265         startline = getattr(self.slice[n],"lineno",0)
 266         endline = getattr(self.slice[n],"endlineno",startline)
 267         return startline,endline
 268
 269     def lexpos(self,n):
 270         return getattr(self.slice[n],"lexpos",0)
 271
 272     def lexspan(self,n):
 273         startpos = getattr(self.slice[n],"lexpos",0)
 274         endpos = getattr(self.slice[n],"endlexpos",startpos)
 275         return startpos,endpos
 276
 277     def error(self):
 278        raise SyntaxError
 279
 280
 281 # -----------------------------------------------------------------------------
 282 #                               == LRParser ==
 283 #
 284 # The LR Parsing engine.
 285 # -----------------------------------------------------------------------------
 286
 287 class LRParser:
 288     def __init__(self,lrtab,errorf):
 289         self.productions = lrtab.lr_productions
 290         self.action      = lrtab.lr_action
 291         self.goto        = lrtab.lr_goto
 292         self.errorfunc   = errorf
 293
 294     def errok(self):
 295         self.errorok     = 1
 296
 297     def restart(self):
 298         del self.statestack[:]
 299         del self.symstack[:]
 300         sym = YaccSymbol()
 301         sym.type = '$end'
 302         self.symstack.append(sym)
 303         self.statestack.append(0)
 304
 305     def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 306         if debug or yaccdevel:
 307             if isinstance(debug,int):
 308                 debug = PlyLogger(sys.stderr)
 309             return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
 310         elif tracking:
 311             return self.parseopt(input,lexer,debug,tracking,tokenfunc)
 312         else:
 313             return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
 314
 315
 316     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 317     # parsedebug().
 318     #
 319     # This is the debugging enabled version of parse().  All changes made to the
 320     # parsing engine should be made here.   For the non-debugging version,
 321     # copy this code to a method parseopt() and delete all of the sections
 322     # enclosed in:
 323     #
 324     #      #--! DEBUG
 325     #      statements
 326     #      #--! DEBUG
 327     #
 328     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 329
 330     def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
 331         lookahead = None                 # Current lookahead symbol
 332         lookaheadstack = [ ]             # Stack of lookahead symbols
 333         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 334         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 335         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 336         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 337         errorcount = 0                   # Used during error recovery
 338
 339         # --! DEBUG
 340         debug.info("PLY: PARSE DEBUG START")
 341         # --! DEBUG
 342
 343         # If no lexer was given, we will try to use the lex module
 344         if not lexer:
 345             lex = load_ply_lex()
 346             lexer = lex.lexer
 347
 348         # Set up the lexer and parser objects on pslice
 349         pslice.lexer = lexer
 350         pslice.parser = self
 351
 352         # If input was supplied, pass to lexer
 353         if input is not None:
 354             lexer.input(input)
 355
 356         if tokenfunc is None:
 357            # Tokenize function
 358            get_token = lexer.token
 359         else:
 360            get_token = tokenfunc
 361
 362         # Set the parser() token method (sometimes used in error recovery)
 363         self.token = get_token
 364
 365         # Set up the state and symbol stacks
 366
 367         statestack = [ ]                # Stack of parsing states
 368         self.statestack = statestack
 369         symstack   = [ ]                # Stack of grammar symbols
 370         self.symstack = symstack
 371
 372         pslice.stack = symstack         # Put in the production
 373         errtoken   = None               # Err token
 374
 375         # The start state is assumed to be (0,$end)
 376
 377         statestack.append(0)
 378         sym = YaccSymbol()
 379         sym.type = "$end"
 380         symstack.append(sym)
 381         state = 0
 382         while 1:
 383             # Get the next symbol on the input.  If a lookahead symbol
 384             # is already set, we just use that. Otherwise, we'll pull
 385             # the next token off of the lookaheadstack or from the lexer
 386
 387             # --! DEBUG
 388             debug.debug('')
 389             debug.debug('State  : %s', state)
 390             # --! DEBUG
 391
 392             if not lookahead:
 393                 if not lookaheadstack:
 394                     lookahead = get_token()     # Get the next token
 395                 else:
 396                     lookahead = lookaheadstack.pop()
 397                 if not lookahead:
 398                     lookahead = YaccSymbol()
 399                     lookahead.type = "$end"
 400
 401             # --! DEBUG
 402             debug.debug('Stack  : %s',
 403                         ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 404             # --! DEBUG
 405
 406             # Check the action table
 407             ltype = lookahead.type
 408             t = actions[state].get(ltype)
 409
 410             if t is not None:
 411                 if t > 0:
 412                     # shift a symbol on the stack
 413                     statestack.append(t)
 414                     state = t
 415
 416                     # --! DEBUG
 417                     debug.debug("Action : Shift and goto state %s", t)
 418                     # --! DEBUG
 419
 420                     symstack.append(lookahead)
 421                     lookahead = None
 422
 423                     # Decrease error count on successful shift
 424                     if errorcount: errorcount -=1
 425                     continue
 426
 427                 if t < 0:
 428                     # reduce a symbol on the stack, emit a production
 429                     p = prod[-t]
 430                     pname = p.name
 431                     plen  = p.len
 432
 433                     # Get production function
 434                     sym = YaccSymbol()
 435                     sym.type = pname       # Production name
 436                     sym.value = None
 437
 438                     # --! DEBUG
 439                     if plen:
 440                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
 441                     else:
 442                         debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
 443
 444                     # --! DEBUG
 445
 446                     if plen:
 447                         targ = symstack[-plen-1:]
 448                         targ[0] = sym
 449
 450                         # --! TRACKING
 451                         if tracking:
 452                            t1 = targ[1]
 453                            sym.lineno = t1.lineno
 454                            sym.lexpos = t1.lexpos
 455                            t1 = targ[-1]
 456                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
 457                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
 458
 459                         # --! TRACKING
 460
 461                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 462                         # The code enclosed in this section is duplicated
 463                         # below as a performance optimization.  Make sure
 464                         # changes get made in both locations.
 465
 466                         pslice.slice = targ
 467
 468                         try:
 469                             # Call the grammar rule with our special slice object
 470                             del symstack[-plen:]
 471                             del statestack[-plen:]
 472                             p.callable(pslice)
 473                             # --! DEBUG
 474                             debug.info("Result : %s", format_result(pslice[0]))
 475                             # --! DEBUG
 476                             symstack.append(sym)
 477                             state = goto[statestack[-1]][pname]
 478                             statestack.append(state)
 479                         except SyntaxError:
 480                             # If an error was set. Enter error recovery state
 481                             lookaheadstack.append(lookahead)
 482                             symstack.pop()
 483                             statestack.pop()
 484                             state = statestack[-1]
 485                             sym.type = 'error'
 486                             lookahead = sym
 487                             errorcount = error_count
 488                             self.errorok = 0
 489                         continue
 490                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 491
 492                     else:
 493
 494                         # --! TRACKING
 495                         if tracking:
 496                            sym.lineno = lexer.lineno
 497                            sym.lexpos = lexer.lexpos
 498                         # --! TRACKING
 499
 500                         targ = [ sym ]
 501
 502                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 503                         # The code enclosed in this section is duplicated
 504                         # above as a performance optimization.  Make sure
 505                         # changes get made in both locations.
 506
 507                         pslice.slice = targ
 508
 509                         try:
 510                             # Call the grammar rule with our special slice object
 511                             p.callable(pslice)
 512                             # --! DEBUG
 513                             debug.info("Result : %s", format_result(pslice[0]))
 514                             # --! DEBUG
 515                             symstack.append(sym)
 516                             state = goto[statestack[-1]][pname]
 517                             statestack.append(state)
 518                         except SyntaxError:
 519                             # If an error was set. Enter error recovery state
 520                             lookaheadstack.append(lookahead)
 521                             symstack.pop()
 522                             statestack.pop()
 523                             state = statestack[-1]
 524                             sym.type = 'error'
 525                             lookahead = sym
 526                             errorcount = error_count
 527                             self.errorok = 0
 528                         continue
 529                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 530
 531                 if t == 0:
 532                     n = symstack[-1]
 533                     result = getattr(n,"value",None)
 534                     # --! DEBUG
 535                     debug.info("Done   : Returning %s", format_result(result))
 536                     debug.info("PLY: PARSE DEBUG END")
 537                     # --! DEBUG
 538                     return result
 539
 540             if t == None:
 541
 542                 # --! DEBUG
 543                 debug.error('Error  : %s',
 544                             ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 545                 # --! DEBUG
 546
 547                 # We have some kind of parsing error here.  To handle
 548                 # this, we are going to push the current token onto
 549                 # the tokenstack and replace it with an 'error' token.
 550                 # If there are any synchronization rules, they may
 551                 # catch it.
 552                 #
 553                 # In addition to pushing the error token, we call call
 554                 # the user defined p_error() function if this is the
 555                 # first syntax error.  This function is only called if
 556                 # errorcount == 0.
 557                 if errorcount == 0 or self.errorok:
 558                     errorcount = error_count
 559                     self.errorok = 0
 560                     errtoken = lookahead
 561                     if errtoken.type == "$end":
 562                         errtoken = None               # End of file!
 563                     if self.errorfunc:
 564                         if errtoken and not hasattr(errtoken,'lexer'):
 565                             errtoken.lexer = lexer
 566                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 567                         if self.errorok:
 568                             # User must have done some kind of panic
 569                             # mode recovery on their own.  The
 570                             # returned token is the next lookahead
 571                             lookahead = tok
 572                             errtoken = None
 573                             continue
 574                     else:
 575                         if errtoken:
 576                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 577                             else: lineno = 0
 578                             if lineno:
 579                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
 580                             else:
 581                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
 582                         else:
 583                             sys.stderr.write("yacc: Parse error in input. EOF\n")
 584                             return
 585
 586                 else:
 587                     errorcount = error_count
 588
 589                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 590                 # entire parse has been rolled back and we're completely hosed.   The token is
 591                 # discarded and we just keep going.
 592
 593                 if len(statestack) <= 1 and lookahead.type != "$end":
 594                     lookahead = None
 595                     errtoken = None
 596                     state = 0
 597                     # Nuke the pushback stack
 598                     del lookaheadstack[:]
 599                     continue
 600
 601                 # case 2: the statestack has a couple of entries on it, but we're
 602                 # at the end of the file. nuke the top entry and generate an error token
 603
 604                 # Start nuking entries on the stack
 605                 if lookahead.type == "$end":
 606                     # Whoa. We're really hosed here. Bail out
 607                     return
 608
 609                 if lookahead.type != 'error':
 610                     sym = symstack[-1]
 611                     if sym.type == 'error':
 612                         # Hmmm. Error is on top of stack, we'll just nuke input
 613                         # symbol and continue
 614                         if tracking:
 615                             sym.endlineno = getattr(lookahead,"lineno", sym.lineno)
 616                             sym.endlexpos = getattr(lookahead,"lexpos", sym.lexpos)
 617                         lookahead = None
 618                         continue
 619                     t = YaccSymbol()
 620                     t.type = 'error'
 621                     if hasattr(lookahead,"lineno"):
 622                         t.lineno = lookahead.lineno
 623                     if hasattr(lookahead,"lexpos"):
 624                         t.lexpos = lookahead.lexpos
 625                     t.value = lookahead
 626                     lookaheadstack.append(lookahead)
 627                     lookahead = t
 628                 else:
 629                     sym = symstack.pop()
 630                     if tracking:
 631                         lookahead.lineno = sym.lineno
 632                         lookahead.lexpos = sym.lexpos
 633                     statestack.pop()
 634                     state = statestack[-1]       # Potential bug fix
 635
 636                 continue
 637
 638             # Call an error function here
 639             raise RuntimeError("yacc: internal parser error!!!\n")
 640
 641     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 642     # parseopt().
 643     #
 644     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
 645     # Edit the debug version above, then copy any modifications to the method
 646     # below while removing #--! DEBUG sections.
 647     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 648
 649
 650     def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 651         lookahead = None                 # Current lookahead symbol
 652         lookaheadstack = [ ]             # Stack of lookahead symbols
 653         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 654         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 655         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 656         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 657         errorcount = 0                   # Used during error recovery
 658
 659         # If no lexer was given, we will try to use the lex module
 660         if not lexer:
 661             lex = load_ply_lex()
 662             lexer = lex.lexer
 663
 664         # Set up the lexer and parser objects on pslice
 665         pslice.lexer = lexer
 666         pslice.parser = self
 667
 668         # If input was supplied, pass to lexer
 669         if input is not None:
 670             lexer.input(input)
 671
 672         if tokenfunc is None:
 673            # Tokenize function
 674            get_token = lexer.token
 675         else:
 676            get_token = tokenfunc
 677
 678         # Set the parser() token method (sometimes used in error recovery)
 679         self.token = get_token
 680
 681         # Set up the state and symbol stacks
 682
 683         statestack = [ ]                # Stack of parsing states
 684         self.statestack = statestack
 685         symstack   = [ ]                # Stack of grammar symbols
 686         self.symstack = symstack
 687
 688         pslice.stack = symstack         # Put in the production
 689         errtoken   = None               # Err token
 690
 691         # The start state is assumed to be (0,$end)
 692
 693         statestack.append(0)
 694         sym = YaccSymbol()
 695         sym.type = '$end'
 696         symstack.append(sym)
 697         state = 0
 698         while 1:
 699             # Get the next symbol on the input.  If a lookahead symbol
 700             # is already set, we just use that. Otherwise, we'll pull
 701             # the next token off of the lookaheadstack or from the lexer
 702
 703             if not lookahead:
 704                 if not lookaheadstack:
 705                     lookahead = get_token()     # Get the next token
 706                 else:
 707                     lookahead = lookaheadstack.pop()
 708                 if not lookahead:
 709                     lookahead = YaccSymbol()
 710                     lookahead.type = '$end'
 711
 712             # Check the action table
 713             ltype = lookahead.type
 714             t = actions[state].get(ltype)
 715
 716             if t is not None:
 717                 if t > 0:
 718                     # shift a symbol on the stack
 719                     statestack.append(t)
 720                     state = t
 721
 722                     symstack.append(lookahead)
 723                     lookahead = None
 724
 725                     # Decrease error count on successful shift
 726                     if errorcount: errorcount -=1
 727                     continue
 728
 729                 if t < 0:
 730                     # reduce a symbol on the stack, emit a production
 731                     p = prod[-t]
 732                     pname = p.name
 733                     plen  = p.len
 734
 735                     # Get production function
 736                     sym = YaccSymbol()
 737                     sym.type = pname       # Production name
 738                     sym.value = None
 739
 740                     if plen:
 741                         targ = symstack[-plen-1:]
 742                         targ[0] = sym
 743
 744                         # --! TRACKING
 745                         if tracking:
 746                            t1 = targ[1]
 747                            sym.lineno = t1.lineno
 748                            sym.lexpos = t1.lexpos
 749                            t1 = targ[-1]
 750                            sym.endlineno = getattr(t1,"endlineno",t1.lineno)
 751                            sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
 752
 753                         # --! TRACKING
 754
 755                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 756                         # The code enclosed in this section is duplicated
 757                         # below as a performance optimization.  Make sure
 758                         # changes get made in both locations.
 759
 760                         pslice.slice = targ
 761
 762                         try:
 763                             # Call the grammar rule with our special slice object
 764                             del symstack[-plen:]
 765                             del statestack[-plen:]
 766                             p.callable(pslice)
 767                             symstack.append(sym)
 768                             state = goto[statestack[-1]][pname]
 769                             statestack.append(state)
 770                         except SyntaxError:
 771                             # If an error was set. Enter error recovery state
 772                             lookaheadstack.append(lookahead)
 773                             symstack.pop()
 774                             statestack.pop()
 775                             state = statestack[-1]
 776                             sym.type = 'error'
 777                             lookahead = sym
 778                             errorcount = error_count
 779                             self.errorok = 0
 780                         continue
 781                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 782
 783                     else:
 784
 785                         # --! TRACKING
 786                         if tracking:
 787                            sym.lineno = lexer.lineno
 788                            sym.lexpos = lexer.lexpos
 789                         # --! TRACKING
 790
 791                         targ = [ sym ]
 792
 793                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 794                         # The code enclosed in this section is duplicated
 795                         # above as a performance optimization.  Make sure
 796                         # changes get made in both locations.
 797
 798                         pslice.slice = targ
 799
 800                         try:
 801                             # Call the grammar rule with our special slice object
 802                             p.callable(pslice)
 803                             symstack.append(sym)
 804                             state = goto[statestack[-1]][pname]
 805                             statestack.append(state)
 806                         except SyntaxError:
 807                             # If an error was set. Enter error recovery state
 808                             lookaheadstack.append(lookahead)
 809                             symstack.pop()
 810                             statestack.pop()
 811                             state = statestack[-1]
 812                             sym.type = 'error'
 813                             lookahead = sym
 814                             errorcount = error_count
 815                             self.errorok = 0
 816                         continue
 817                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 818
 819                 if t == 0:
 820                     n = symstack[-1]
 821                     return getattr(n,"value",None)
 822
 823             if t == None:
 824
 825                 # We have some kind of parsing error here.  To handle
 826                 # this, we are going to push the current token onto
 827                 # the tokenstack and replace it with an 'error' token.
 828                 # If there are any synchronization rules, they may
 829                 # catch it.
 830                 #
 831                 # In addition to pushing the error token, we call call
 832                 # the user defined p_error() function if this is the
 833                 # first syntax error.  This function is only called if
 834                 # errorcount == 0.
 835                 if errorcount == 0 or self.errorok:
 836                     errorcount = error_count
 837                     self.errorok = 0
 838                     errtoken = lookahead
 839                     if errtoken.type == '$end':
 840                         errtoken = None               # End of file!
 841                     if self.errorfunc:
 842                         if errtoken and not hasattr(errtoken,'lexer'):
 843                             errtoken.lexer = lexer
 844                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 845
 846                         if self.errorok:
 847                             # User must have done some kind of panic
 848                             # mode recovery on their own.  The
 849                             # returned token is the next lookahead
 850                             lookahead = tok
 851                             errtoken = None
 852                             continue
 853                     else:
 854                         if errtoken:
 855                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
 856                             else: lineno = 0
 857                             if lineno:
 858                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
 859                             else:
 860                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
 861                         else:
 862                             sys.stderr.write("yacc: Parse error in input. EOF\n")
 863                             return
 864
 865                 else:
 866                     errorcount = error_count
 867
 868                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 869                 # entire parse has been rolled back and we're completely hosed.   The token is
 870                 # discarded and we just keep going.
 871
 872                 if len(statestack) <= 1 and lookahead.type != '$end':
 873                     lookahead = None
 874                     errtoken = None
 875                     state = 0
 876                     # Nuke the pushback stack
 877                     del lookaheadstack[:]
 878                     continue
 879
 880                 # case 2: the statestack has a couple of entries on it, but we're
 881                 # at the end of the file. nuke the top entry and generate an error token
 882
 883                 # Start nuking entries on the stack
 884                 if lookahead.type == '$end':
 885                     # Whoa. We're really hosed here. Bail out
 886                     return
 887
 888                 if lookahead.type != 'error':
 889                     sym = symstack[-1]
 890                     if sym.type == 'error':
 891                         # Hmmm. Error is on top of stack, we'll just nuke input
 892                         # symbol and continue
 893                         if tracking:
 894                             sym.endlineno = getattr(lookahead,"lineno", sym.lineno)
 895                             sym.endlexpos = getattr(lookahead,"lexpos", sym.lexpos)
 896                         lookahead = None
 897                         continue
 898                     t = YaccSymbol()
 899                     t.type = 'error'
 900                     if hasattr(lookahead,"lineno"):
 901                         t.lineno = lookahead.lineno
 902                     if hasattr(lookahead,"lexpos"):
 903                         t.lexpos = lookahead.lexpos
 904                     t.value = lookahead
 905                     lookaheadstack.append(lookahead)
 906                     lookahead = t
 907                 else:
 908                     sym = symstack.pop()
 909                     if tracking:
 910                         lookahead.lineno = sym.lineno
 911                         lookahead.lexpos = sym.lexpos
 912                     statestack.pop()
 913                     state = statestack[-1]       # Potential bug fix
 914
 915                 continue
 916
 917             # Call an error function here
 918             raise RuntimeError("yacc: internal parser error!!!\n")
 919
 920     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 921     # parseopt_notrack().
 922     #
 923     # Optimized version of parseopt() with line number tracking removed.
 924     # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
 925     # code in the #--! TRACKING sections
 926     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 927
 928     def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
 929         lookahead = None                 # Current lookahead symbol
 930         lookaheadstack = [ ]             # Stack of lookahead symbols
 931         actions = self.action            # Local reference to action table (to avoid lookup on self.)
 932         goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
 933         prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
 934         pslice  = YaccProduction(None)   # Production object passed to grammar rules
 935         errorcount = 0                   # Used during error recovery
 936
 937         # If no lexer was given, we will try to use the lex module
 938         if not lexer:
 939             lex = load_ply_lex()
 940             lexer = lex.lexer
 941
 942         # Set up the lexer and parser objects on pslice
 943         pslice.lexer = lexer
 944         pslice.parser = self
 945
 946         # If input was supplied, pass to lexer
 947         if input is not None:
 948             lexer.input(input)
 949
 950         if tokenfunc is None:
 951            # Tokenize function
 952            get_token = lexer.token
 953         else:
 954            get_token = tokenfunc
 955
 956         # Set the parser() token method (sometimes used in error recovery)
 957         self.token = get_token
 958
 959         # Set up the state and symbol stacks
 960
 961         statestack = [ ]                # Stack of parsing states
 962         self.statestack = statestack
 963         symstack   = [ ]                # Stack of grammar symbols
 964         self.symstack = symstack
 965
 966         pslice.stack = symstack         # Put in the production
 967         errtoken   = None               # Err token
 968
 969         # The start state is assumed to be (0,$end)
 970
 971         statestack.append(0)
 972         sym = YaccSymbol()
 973         sym.type = '$end'
 974         symstack.append(sym)
 975         state = 0
 976         while 1:
 977             # Get the next symbol on the input.  If a lookahead symbol
 978             # is already set, we just use that. Otherwise, we'll pull
 979             # the next token off of the lookaheadstack or from the lexer
 980
 981             if not lookahead:
 982                 if not lookaheadstack:
 983                     lookahead = get_token()     # Get the next token
 984                 else:
 985                     lookahead = lookaheadstack.pop()
 986                 if not lookahead:
 987                     lookahead = YaccSymbol()
 988                     lookahead.type = '$end'
 989
 990             # Check the action table
 991             ltype = lookahead.type
 992             t = actions[state].get(ltype)
 993
 994             if t is not None:
 995                 if t > 0:
 996                     # shift a symbol on the stack
 997                     statestack.append(t)
 998                     state = t
 999
1000                     symstack.append(lookahead)
1001                     lookahead = None
1002
1003                     # Decrease error count on successful shift
1004                     if errorcount: errorcount -=1
1005                     continue
1006
1007                 if t < 0:
1008                     # reduce a symbol on the stack, emit a production
1009                     p = prod[-t]
1010                     pname = p.name
1011                     plen  = p.len
1012
1013                     # Get production function
1014                     sym = YaccSymbol()
1015                     sym.type = pname       # Production name
1016                     sym.value = None
1017
1018                     if plen:
1019                         targ = symstack[-plen-1:]
1020                         targ[0] = sym
1021
1022                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1023                         # The code enclosed in this section is duplicated
1024                         # below as a performance optimization.  Make sure
1025                         # changes get made in both locations.
1026
1027                         pslice.slice = targ
1028
1029                         try:
1030                             # Call the grammar rule with our special slice object
1031                             del symstack[-plen:]
1032                             del statestack[-plen:]
1033                             p.callable(pslice)
1034                             symstack.append(sym)
1035                             state = goto[statestack[-1]][pname]
1036                             statestack.append(state)
1037                         except SyntaxError:
1038                             # If an error was set. Enter error recovery state
1039                             lookaheadstack.append(lookahead)
1040                             symstack.pop()
1041                             statestack.pop()
1042                             state = statestack[-1]
1043                             sym.type = 'error'
1044                             lookahead = sym
1045                             errorcount = error_count
1046                             self.errorok = 0
1047                         continue
1048                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1049
1050                     else:
1051
1052                         targ = [ sym ]
1053
1054                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1055                         # The code enclosed in this section is duplicated
1056                         # above as a performance optimization.  Make sure
1057                         # changes get made in both locations.
1058
1059                         pslice.slice = targ
1060
1061                         try:
1062                             # Call the grammar rule with our special slice object
1063                             p.callable(pslice)
1064                             symstack.append(sym)
1065                             state = goto[statestack[-1]][pname]
1066                             statestack.append(state)
1067                         except SyntaxError:
1068                             # If an error was set. Enter error recovery state
1069                             lookaheadstack.append(lookahead)
1070                             symstack.pop()
1071                             statestack.pop()
1072                             state = statestack[-1]
1073                             sym.type = 'error'
1074                             lookahead = sym
1075                             errorcount = error_count
1076                             self.errorok = 0
1077                         continue
1078                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1079
1080                 if t == 0:
1081                     n = symstack[-1]
1082                     return getattr(n,"value",None)
1083
1084             if t == None:
1085
1086                 # We have some kind of parsing error here.  To handle
1087                 # this, we are going to push the current token onto
1088                 # the tokenstack and replace it with an 'error' token.
1089                 # If there are any synchronization rules, they may
1090                 # catch it.
1091                 #
1092                 # In addition to pushing the error token, we call call
1093                 # the user defined p_error() function if this is the
1094                 # first syntax error.  This function is only called if
1095                 # errorcount == 0.
1096                 if errorcount == 0 or self.errorok:
1097                     errorcount = error_count
1098                     self.errorok = 0
1099                     errtoken = lookahead
1100                     if errtoken.type == '$end':
1101                         errtoken = None               # End of file!
1102                     if self.errorfunc:
1103                         if errtoken and not hasattr(errtoken,'lexer'):
1104                             errtoken.lexer = lexer
1105                         tok = call_errorfunc(self.errorfunc, errtoken, self)
1106
1107                         if self.errorok:
1108                             # User must have done some kind of panic
1109                             # mode recovery on their own.  The
1110                             # returned token is the next lookahead
1111                             lookahead = tok
1112                             errtoken = None
1113                             continue
1114                     else:
1115                         if errtoken:
1116                             if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1117                             else: lineno = 0
1118                             if lineno:
1119                                 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1120                             else:
1121                                 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1122                         else:
1123                             sys.stderr.write("yacc: Parse error in input. EOF\n")
1124                             return
1125
1126                 else:
1127                     errorcount = error_count
1128
1129                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1130                 # entire parse has been rolled back and we're completely hosed.   The token is
1131                 # discarded and we just keep going.
1132
1133                 if len(statestack) <= 1 and lookahead.type != '$end':
1134                     lookahead = None
1135                     errtoken = None
1136                     state = 0
1137                     # Nuke the pushback stack
1138                     del lookaheadstack[:]
1139                     continue
1140
1141                 # case 2: the statestack has a couple of entries on it, but we're
1142                 # at the end of the file. nuke the top entry and generate an error token
1143
1144                 # Start nuking entries on the stack
1145                 if lookahead.type == '$end':
1146                     # Whoa. We're really hosed here. Bail out
1147                     return
1148
1149                 if lookahead.type != 'error':
1150                     sym = symstack[-1]
1151                     if sym.type == 'error':
1152                         # Hmmm. Error is on top of stack, we'll just nuke input
1153                         # symbol and continue
1154                         lookahead = None
1155                         continue
1156                     t = YaccSymbol()
1157                     t.type = 'error'
1158                     if hasattr(lookahead,"lineno"):
1159                         t.lineno = lookahead.lineno
1160                     if hasattr(lookahead,"lexpos"):
1161                         t.lexpos = lookahead.lexpos
1162                     t.value = lookahead
1163                     lookaheadstack.append(lookahead)
1164                     lookahead = t
1165                 else:
1166                     symstack.pop()
1167                     statestack.pop()
1168                     state = statestack[-1]       # Potential bug fix
1169
1170                 continue
1171
1172             # Call an error function here
1173             raise RuntimeError("yacc: internal parser error!!!\n")
1174
1175 # -----------------------------------------------------------------------------
1176 #                          === Grammar Representation ===
1177 #
1178 # The following functions, classes, and variables are used to represent and
1179 # manipulate the rules that make up a grammar.
1180 # -----------------------------------------------------------------------------
1181
1182 import re
1183
1184 # regex matching identifiers
1185 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1186
1187 # -----------------------------------------------------------------------------
1188 # class Production:
1189 #
1190 # This class stores the raw information about a single production or grammar rule.
1191 # A grammar rule refers to a specification such as this:
1192 #
1193 #       expr : expr PLUS term
1194 #
1195 # Here are the basic attributes defined on all productions
1196 #
1197 #       name     - Name of the production.  For example 'expr'
1198 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
1199 #       prec     - Production precedence level
1200 #       number   - Production number.
1201 #       func     - Function that executes on reduce
1202 #       file     - File where production function is defined
1203 #       lineno   - Line number where production function is defined
1204 #
1205 # The following attributes are defined or optional.
1206 #
1207 #       len       - Length of the production (number of symbols on right hand side)
1208 #       usyms     - Set of unique symbols found in the production
1209 # -----------------------------------------------------------------------------
1210
1211 class Production(object):
1212     reduced = 0
1213     def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1214         self.name     = name
1215         self.prod     = tuple(prod)
1216         self.number   = number
1217         self.func     = func
1218         self.callable = None
1219         self.file     = file
1220         self.line     = line
1221         self.prec     = precedence
1222
1223         # Internal settings used during table construction
1224
1225         self.len  = len(self.prod)   # Length of the production
1226
1227         # Create a list of unique production symbols used in the production
1228         self.usyms = [ ]
1229         for s in self.prod:
1230             if s not in self.usyms:
1231                 self.usyms.append(s)
1232
1233         # List of all LR items for the production
1234         self.lr_items = []
1235         self.lr_next = None
1236
1237         # Create a string representation
1238         if self.prod:
1239             self.str = "%s -> %s" % (self.name," ".join(self.prod))
1240         else:
1241             self.str = "%s -> <empty>" % self.name
1242
1243     def __str__(self):
1244         return self.str
1245
1246     def __repr__(self):
1247         return "Production("+str(self)+")"
1248
1249     def __len__(self):
1250         return len(self.prod)
1251
1252     def __nonzero__(self):
1253         return 1
1254
1255     def __getitem__(self,index):
1256         return self.prod[index]
1257
1258     # Return the nth lr_item from the production (or None if at the end)
1259     def lr_item(self,n):
1260         if n > len(self.prod): return None
1261         p = LRItem(self,n)
1262
1263         # Precompute the list of productions immediately following.  Hack. Remove later
1264         try:
1265             p.lr_after = Prodnames[p.prod[n+1]]
1266         except (IndexError,KeyError):
1267             p.lr_after = []
1268         try:
1269             p.lr_before = p.prod[n-1]
1270         except IndexError:
1271             p.lr_before = None
1272
1273         return p
1274
1275     # Bind the production function name to a callable
1276     def bind(self,pdict):
1277         if self.func:
1278             self.callable = pdict[self.func]
1279
1280 # This class serves as a minimal standin for Production objects when
1281 # reading table data from files.   It only contains information
1282 # actually used by the LR parsing engine, plus some additional
1283 # debugging information.
1284 class MiniProduction(object):
1285     def __init__(self,str,name,len,func,file,line):
1286         self.name     = name
1287         self.len      = len
1288         self.func     = func
1289         self.callable = None
1290         self.file     = file
1291         self.line     = line
1292         self.str      = str
1293     def __str__(self):
1294         return self.str
1295     def __repr__(self):
1296         return "MiniProduction(%s)" % self.str
1297
1298     # Bind the production function name to a callable
1299     def bind(self,pdict):
1300         if self.func:
1301             self.callable = pdict[self.func]
1302
1303
1304 # -----------------------------------------------------------------------------
1305 # class LRItem
1306 #
1307 # This class represents a specific stage of parsing a production rule.  For
1308 # example:
1309 #
1310 #       expr : expr . PLUS term
1311 #
1312 # In the above, the "." represents the current location of the parse.  Here
1313 # basic attributes:
1314 #
1315 #       name       - Name of the production.  For example 'expr'
1316 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1317 #       number     - Production number.
1318 #
1319 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1320 #                    then lr_next refers to 'expr -> expr PLUS . term'
1321 #       lr_index   - LR item index (location of the ".") in the prod list.
1322 #       lookaheads - LALR lookahead symbols for this item
1323 #       len        - Length of the production (number of symbols on right hand side)
1324 #       lr_after    - List of all productions that immediately follow
1325 #       lr_before   - Grammar symbol immediately before
1326 # -----------------------------------------------------------------------------
1327
1328 class LRItem(object):
1329     def __init__(self,p,n):
1330         self.name       = p.name
1331         self.prod       = list(p.prod)
1332         self.number     = p.number
1333         self.lr_index   = n
1334         self.lookaheads = { }
1335         self.prod.insert(n,".")
1336         self.prod       = tuple(self.prod)
1337         self.len        = len(self.prod)
1338         self.usyms      = p.usyms
1339
1340     def __str__(self):
1341         if self.prod:
1342             s = "%s -> %s" % (self.name," ".join(self.prod))
1343         else:
1344             s = "%s -> <empty>" % self.name
1345         return s
1346
1347     def __repr__(self):
1348         return "LRItem("+str(self)+")"
1349
1350 # -----------------------------------------------------------------------------
1351 # rightmost_terminal()
1352 #
1353 # Return the rightmost terminal from a list of symbols.  Used in add_production()
1354 # -----------------------------------------------------------------------------
1355 def rightmost_terminal(symbols, terminals):
1356     i = len(symbols) - 1
1357     while i >= 0:
1358         if symbols[i] in terminals:
1359             return symbols[i]
1360         i -= 1
1361     return None
1362
1363 # -----------------------------------------------------------------------------
1364 #                           === GRAMMAR CLASS ===
1365 #
1366 # The following class represents the contents of the specified grammar along
1367 # with various computed properties such as first sets, follow sets, LR items, etc.
1368 # This data is used for critical parts of the table generation process later.
1369 # -----------------------------------------------------------------------------
1370
1371 class GrammarError(YaccError): pass
1372
1373 class Grammar(object):
1374     def __init__(self,terminals):
1375         self.Productions  = [None]  # A list of all of the productions.  The first
1376                                     # entry is always reserved for the purpose of
1377                                     # building an augmented grammar
1378
1379         self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
1380                                     # productions of that nonterminal.
1381
1382         self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
1383                                     # productions.
1384
1385         self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
1386                                     # list of the rules where they are used.
1387
1388         for term in terminals:
1389             self.Terminals[term] = []
1390
1391         self.Terminals['error'] = []
1392
1393         self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
1394                                     # of rule numbers where they are used.
1395
1396         self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
1397
1398         self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
1399
1400         self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
1401                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
1402
1403         self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
1404                                     # This is only used to provide error checking and to generate
1405                                     # a warning about unused precedence rules.
1406
1407         self.Start = None           # Starting symbol for the grammar
1408
1409
1410     def __len__(self):
1411         return len(self.Productions)
1412
1413     def __getitem__(self,index):
1414         return self.Productions[index]
1415
1416     # -----------------------------------------------------------------------------
1417     # set_precedence()
1418     #
1419     # Sets the precedence for a given terminal. assoc is the associativity such as
1420     # 'left','right', or 'nonassoc'.  level is a numeric level.
1421     #
1422     # -----------------------------------------------------------------------------
1423
1424     def set_precedence(self,term,assoc,level):
1425         assert self.Productions == [None],"Must call set_precedence() before add_production()"
1426         if term in self.Precedence:
1427             raise GrammarError("Precedence already specified for terminal %r" % term)
1428         if assoc not in ['left','right','nonassoc']:
1429             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1430         self.Precedence[term] = (assoc,level)
1431
1432     # -----------------------------------------------------------------------------
1433     # add_production()
1434     #
1435     # Given an action function, this function assembles a production rule and
1436     # computes its precedence level.
1437     #
1438     # The production rule is supplied as a list of symbols.   For example,
1439     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1440     # symbols ['expr','PLUS','term'].
1441     #
1442     # Precedence is determined by the precedence of the right-most non-terminal
1443     # or the precedence of a terminal specified by %prec.
1444     #
1445     # A variety of error checks are performed to make sure production symbols
1446     # are valid and that %prec is used correctly.
1447     # -----------------------------------------------------------------------------
1448
1449     def add_production(self,prodname,syms,func=None,file='',line=0):
1450
1451         if prodname in self.Terminals:
1452             raise GrammarError("%s:%d: Illegal rule name %r. Already defined as a token" % (file,line,prodname))
1453         if prodname == 'error':
1454             raise GrammarError("%s:%d: Illegal rule name %r. error is a reserved word" % (file,line,prodname))
1455         if not _is_identifier.match(prodname):
1456             raise GrammarError("%s:%d: Illegal rule name %r" % (file,line,prodname))
1457
1458         # Look for literal tokens
1459         for n,s in enumerate(syms):
1460             if s[0] in "'\"":
1461                  try:
1462                      c = eval(s)
1463                      if (len(c) > 1):
1464                           raise GrammarError("%s:%d: Literal token %s in rule %r may only be a single character" % (file,line,s, prodname))
1465                      if not c in self.Terminals:
1466                           self.Terminals[c] = []
1467                      syms[n] = c
1468                      continue
1469                  except SyntaxError:
1470                      pass
1471             if not _is_identifier.match(s) and s != '%prec':
1472                 raise GrammarError("%s:%d: Illegal name %r in rule %r" % (file,line,s, prodname))
1473
1474         # Determine the precedence level
1475         if '%prec' in syms:
1476             if syms[-1] == '%prec':
1477                 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1478             if syms[-2] != '%prec':
1479                 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1480             precname = syms[-1]
1481             prodprec = self.Precedence.get(precname)
1482             if not prodprec:
1483                 raise GrammarError("%s:%d: Nothing known about the precedence of %r" % (file,line,precname))
1484             else:
1485                 self.UsedPrecedence[precname] = 1
1486             del syms[-2:]     # Drop %prec from the rule
1487         else:
1488             # If no %prec, precedence is determined by the rightmost terminal symbol
1489             precname = rightmost_terminal(syms,self.Terminals)
1490             prodprec = self.Precedence.get(precname,('right',0))
1491
1492         # See if the rule is already in the rulemap
1493         map = "%s -> %s" % (prodname,syms)
1494         if map in self.Prodmap:
1495             m = self.Prodmap[map]
1496             raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1497                                "Previous definition at %s:%d" % (m.file, m.line))
1498
1499         # From this point on, everything is valid.  Create a new Production instance
1500         pnumber  = len(self.Productions)
1501         if not prodname in self.Nonterminals:
1502             self.Nonterminals[prodname] = [ ]
1503
1504         # Add the production number to Terminals and Nonterminals
1505         for t in syms:
1506             if t in self.Terminals:
1507                 self.Terminals[t].append(pnumber)
1508             else:
1509                 if not t in self.Nonterminals:
1510                     self.Nonterminals[t] = [ ]
1511                 self.Nonterminals[t].append(pnumber)
1512
1513         # Create a production and add it to the list of productions
1514         p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1515         self.Productions.append(p)
1516         self.Prodmap[map] = p
1517
1518         # Add to the global productions list
1519         try:
1520             self.Prodnames[prodname].append(p)
1521         except KeyError:
1522             self.Prodnames[prodname] = [ p ]
1523         return 0
1524
1525     # -----------------------------------------------------------------------------
1526     # set_start()
1527     #
1528     # Sets the starting symbol and creates the augmented grammar.  Production
1529     # rule 0 is S' -> start where start is the start symbol.
1530     # -----------------------------------------------------------------------------
1531
1532     def set_start(self,start=None):
1533         if not start:
1534             start = self.Productions[1].name
1535         if start not in self.Nonterminals:
1536             raise GrammarError("start symbol %s undefined" % start)
1537         self.Productions[0] = Production(0,"S'",[start])
1538         self.Nonterminals[start].append(0)
1539         self.Start = start
1540
1541     # -----------------------------------------------------------------------------
1542     # find_unreachable()
1543     #
1544     # Find all of the nonterminal symbols that can't be reached from the starting
1545     # symbol.  Returns a list of nonterminals that can't be reached.
1546     # -----------------------------------------------------------------------------
1547
1548     def find_unreachable(self):
1549
1550         # Mark all symbols that are reachable from a symbol s
1551         def mark_reachable_from(s):
1552             if reachable[s]:
1553                 # We've already reached symbol s.
1554                 return
1555             reachable[s] = 1
1556             for p in self.Prodnames.get(s,[]):
1557                 for r in p.prod:
1558                     mark_reachable_from(r)
1559
1560         reachable   = { }
1561         for s in list(self.Terminals) + list(self.Nonterminals):
1562             reachable[s] = 0
1563
1564         mark_reachable_from( self.Productions[0].prod[0] )
1565
1566         return [s for s in list(self.Nonterminals)
1567                         if not reachable[s]]
1568
1569     # -----------------------------------------------------------------------------
1570     # infinite_cycles()
1571     #
1572     # This function looks at the various parsing rules and tries to detect
1573     # infinite recursion cycles (grammar rules where there is no possible way
1574     # to derive a string of only terminals).
1575     # -----------------------------------------------------------------------------
1576
1577     def infinite_cycles(self):
1578         terminates = {}
1579
1580         # Terminals:
1581         for t in self.Terminals:
1582             terminates[t] = 1
1583
1584         terminates['$end'] = 1
1585
1586         # Nonterminals:
1587
1588         # Initialize to false:
1589         for n in self.Nonterminals:
1590             terminates[n] = 0
1591
1592         # Then propagate termination until no change:
1593         while 1:
1594             some_change = 0
1595             for (n,pl) in self.Prodnames.items():
1596                 # Nonterminal n terminates iff any of its productions terminates.
1597                 for p in pl:
1598                     # Production p terminates iff all of its rhs symbols terminate.
1599                     for s in p.prod:
1600                         if not terminates[s]:
1601                             # The symbol s does not terminate,
1602                             # so production p does not terminate.
1603                             p_terminates = 0
1604                             break
1605                     else:
1606                         # didn't break from the loop,
1607                         # so every symbol s terminates
1608                         # so production p terminates.
1609                         p_terminates = 1
1610
1611                     if p_terminates:
1612                         # symbol n terminates!
1613                         if not terminates[n]:
1614                             terminates[n] = 1
1615                             some_change = 1
1616                         # Don't need to consider any more productions for this n.
1617                         break
1618
1619             if not some_change:
1620                 break
1621
1622         infinite = []
1623         for (s,term) in terminates.items():
1624             if not term:
1625                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1626                     # s is used-but-not-defined, and we've already warned of that,
1627                     # so it would be overkill to say that it's also non-terminating.
1628                     pass
1629                 else:
1630                     infinite.append(s)
1631
1632         return infinite
1633
1634
1635     # -----------------------------------------------------------------------------
1636     # undefined_symbols()
1637     #
1638     # Find all symbols that were used the grammar, but not defined as tokens or
1639     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1640     # and prod is the production where the symbol was used.
1641     # -----------------------------------------------------------------------------
1642     def undefined_symbols(self):
1643         result = []
1644         for p in self.Productions:
1645             if not p: continue
1646
1647             for s in p.prod:
1648                 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1649                     result.append((s,p))
1650         return result
1651
1652     # -----------------------------------------------------------------------------
1653     # unused_terminals()
1654     #
1655     # Find all terminals that were defined, but not used by the grammar.  Returns
1656     # a list of all symbols.
1657     # -----------------------------------------------------------------------------
1658     def unused_terminals(self):
1659         unused_tok = []
1660         for s,v in self.Terminals.items():
1661             if s != 'error' and not v:
1662                 unused_tok.append(s)
1663
1664         return unused_tok
1665
1666     # ------------------------------------------------------------------------------
1667     # unused_rules()
1668     #
1669     # Find all grammar rules that were defined,  but not used (maybe not reachable)
1670     # Returns a list of productions.
1671     # ------------------------------------------------------------------------------
1672
1673     def unused_rules(self):
1674         unused_prod = []
1675         for s,v in self.Nonterminals.items():
1676             if not v:
1677                 p = self.Prodnames[s][0]
1678                 unused_prod.append(p)
1679         return unused_prod
1680
1681     # -----------------------------------------------------------------------------
1682     # unused_precedence()
1683     #
1684     # Returns a list of tuples (term,precedence) corresponding to precedence
1685     # rules that were never used by the grammar.  term is the name of the terminal
1686     # on which precedence was applied and precedence is a string such as 'left' or
1687     # 'right' corresponding to the type of precedence.
1688     # -----------------------------------------------------------------------------
1689
1690     def unused_precedence(self):
1691         unused = []
1692         for termname in self.Precedence:
1693             if not (termname in self.Terminals or termname in self.UsedPrecedence):
1694                 unused.append((termname,self.Precedence[termname][0]))
1695
1696         return unused
1697
1698     # -------------------------------------------------------------------------
1699     # _first()
1700     #
1701     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1702     #
1703     # During execution of compute_first1, the result may be incomplete.
1704     # Afterward (e.g., when called from compute_follow()), it will be complete.
1705     # -------------------------------------------------------------------------
1706     def _first(self,beta):
1707
1708         # We are computing First(x1,x2,x3,...,xn)
1709         result = [ ]
1710         for x in beta:
1711             x_produces_empty = 0
1712
1713             # Add all the non-<empty> symbols of First[x] to the result.
1714             for f in self.First[x]:
1715                 if f == '<empty>':
1716                     x_produces_empty = 1
1717                 else:
1718                     if f not in result: result.append(f)
1719
1720             if x_produces_empty:
1721                 # We have to consider the next x in beta,
1722                 # i.e. stay in the loop.
1723                 pass
1724             else:
1725                 # We don't have to consider any further symbols in beta.
1726                 break
1727         else:
1728             # There was no 'break' from the loop,
1729             # so x_produces_empty was true for all x in beta,
1730             # so beta produces empty as well.
1731             result.append('<empty>')
1732
1733         return result
1734
1735     # -------------------------------------------------------------------------
1736     # compute_first()
1737     #
1738     # Compute the value of FIRST1(X) for all symbols
1739     # -------------------------------------------------------------------------
1740     def compute_first(self):
1741         if self.First:
1742             return self.First
1743
1744         # Terminals:
1745         for t in self.Terminals:
1746             self.First[t] = [t]
1747
1748         self.First['$end'] = ['$end']
1749
1750         # Nonterminals:
1751
1752         # Initialize to the empty set:
1753         for n in self.Nonterminals:
1754             self.First[n] = []
1755
1756         # Then propagate symbols until no change:
1757         while 1:
1758             some_change = 0
1759             for n in self.Nonterminals:
1760                 for p in self.Prodnames[n]:
1761                     for f in self._first(p.prod):
1762                         if f not in self.First[n]:
1763                             self.First[n].append( f )
1764                             some_change = 1
1765             if not some_change:
1766                 break
1767
1768         return self.First
1769
1770     # ---------------------------------------------------------------------
1771     # compute_follow()
1772     #
1773     # Computes all of the follow sets for every non-terminal symbol.  The
1774     # follow set is the set of all symbols that might follow a given
1775     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1776     # ---------------------------------------------------------------------
1777     def compute_follow(self,start=None):
1778         # If already computed, return the result
1779         if self.Follow:
1780             return self.Follow
1781
1782         # If first sets not computed yet, do that first.
1783         if not self.First:
1784             self.compute_first()
1785
1786         # Add '$end' to the follow list of the start symbol
1787         for k in self.Nonterminals:
1788             self.Follow[k] = [ ]
1789
1790         if not start:
1791             start = self.Productions[1].name
1792
1793         self.Follow[start] = [ '$end' ]
1794
1795         while 1:
1796             didadd = 0
1797             for p in self.Productions[1:]:
1798                 # Here is the production set
1799                 for i in range(len(p.prod)):
1800                     B = p.prod[i]
1801                     if B in self.Nonterminals:
1802                         # Okay. We got a non-terminal in a production
1803                         fst = self._first(p.prod[i+1:])
1804                         hasempty = 0
1805                         for f in fst:
1806                             if f != '<empty>' and f not in self.Follow[B]:
1807                                 self.Follow[B].append(f)
1808                                 didadd = 1
1809                             if f == '<empty>':
1810                                 hasempty = 1
1811                         if hasempty or i == (len(p.prod)-1):
1812                             # Add elements of follow(a) to follow(b)
1813                             for f in self.Follow[p.name]:
1814                                 if f not in self.Follow[B]:
1815                                     self.Follow[B].append(f)
1816                                     didadd = 1
1817             if not didadd: break
1818         return self.Follow
1819
1820
1821     # -----------------------------------------------------------------------------
1822     # build_lritems()
1823     #
1824     # This function walks the list of productions and builds a complete set of the
1825     # LR items.  The LR items are stored in two ways:  First, they are uniquely
1826     # numbered and placed in the list _lritems.  Second, a linked list of LR items
1827     # is built for each production.  For example:
1828     #
1829     #   E -> E PLUS E
1830     #
1831     # Creates the list
1832     #
1833     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1834     # -----------------------------------------------------------------------------
1835
1836     def build_lritems(self):
1837         for p in self.Productions:
1838             lastlri = p
1839             i = 0
1840             lr_items = []
1841             while 1:
1842                 if i > len(p):
1843                     lri = None
1844                 else:
1845                     lri = LRItem(p,i)
1846                     # Precompute the list of productions immediately following
1847                     try:
1848                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
1849                     except (IndexError,KeyError):
1850                         lri.lr_after = []
1851                     try:
1852                         lri.lr_before = lri.prod[i-1]
1853                     except IndexError:
1854                         lri.lr_before = None
1855
1856                 lastlri.lr_next = lri
1857                 if not lri: break
1858                 lr_items.append(lri)
1859                 lastlri = lri
1860                 i += 1
1861             p.lr_items = lr_items
1862
1863 # -----------------------------------------------------------------------------
1864 #                            == Class LRTable ==
1865 #
1866 # This basic class represents a basic table of LR parsing information.
1867 # Methods for generating the tables are not defined here.  They are defined
1868 # in the derived class LRGeneratedTable.
1869 # -----------------------------------------------------------------------------
1870
1871 class VersionError(YaccError): pass
1872
1873 class LRTable(object):
1874     def __init__(self):
1875         self.lr_action = None
1876         self.lr_goto = None
1877         self.lr_productions = None
1878         self.lr_method = None
1879
1880     def read_table(self,module):
1881         if isinstance(module,types.ModuleType):
1882             parsetab = module
1883         else:
1884             if sys.version_info[0] < 3:
1885                 exec("import %s as parsetab" % module)
1886             else:
1887                 env = { }
1888                 exec("import %s as parsetab" % module, env, env)
1889                 parsetab = env['parsetab']
1890
1891         if parsetab._tabversion != __tabversion__:
1892             raise VersionError("yacc table file version is out of date")
1893
1894         self.lr_action = parsetab._lr_action
1895         self.lr_goto = parsetab._lr_goto
1896
1897         self.lr_productions = []
1898         for p in parsetab._lr_productions:
1899             self.lr_productions.append(MiniProduction(*p))
1900
1901         self.lr_method = parsetab._lr_method
1902         return parsetab._lr_signature
1903
1904     def read_pickle(self,filename):
1905         try:
1906             import cPickle as pickle
1907         except ImportError:
1908             import pickle
1909
1910         in_f = open(filename,"rb")
1911
1912         tabversion = pickle.load(in_f)
1913         if tabversion != __tabversion__:
1914             raise VersionError("yacc table file version is out of date")
1915         self.lr_method = pickle.load(in_f)
1916         signature      = pickle.load(in_f)
1917         self.lr_action = pickle.load(in_f)
1918         self.lr_goto   = pickle.load(in_f)
1919         productions    = pickle.load(in_f)
1920
1921         self.lr_productions = []
1922         for p in productions:
1923             self.lr_productions.append(MiniProduction(*p))
1924
1925         in_f.close()
1926         return signature
1927
1928     # Bind all production function names to callable objects in pdict
1929     def bind_callables(self,pdict):
1930         for p in self.lr_productions:
1931             p.bind(pdict)
1932
1933 # -----------------------------------------------------------------------------
1934 #                           === LR Generator ===
1935 #
1936 # The following classes and functions are used to generate LR parsing tables on
1937 # a grammar.
1938 # -----------------------------------------------------------------------------
1939
1940 # -----------------------------------------------------------------------------
1941 # digraph()
1942 # traverse()
1943 #
1944 # The following two functions are used to compute set valued functions
1945 # of the form:
1946 #
1947 #     F(x) = F'(x) U U{F(y) | x R y}
1948 #
1949 # This is used to compute the values of Read() sets as well as FOLLOW sets
1950 # in LALR(1) generation.
1951 #
1952 # Inputs:  X    - An input set
1953 #          R    - A relation
1954 #          FP   - Set-valued function
1955 # ------------------------------------------------------------------------------
1956
1957 def digraph(X,R,FP):
1958     N = { }
1959     for x in X:
1960        N[x] = 0
1961     stack = []
1962     F = { }
1963     for x in X:
1964         if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1965     return F
1966
1967 def traverse(x,N,stack,F,X,R,FP):
1968     stack.append(x)
1969     d = len(stack)
1970     N[x] = d
1971     F[x] = FP(x)             # F(X) <- F'(x)
1972
1973     rel = R(x)               # Get y's related to x
1974     for y in rel:
1975         if N[y] == 0:
1976              traverse(y,N,stack,F,X,R,FP)
1977         N[x] = min(N[x],N[y])
1978         for a in F.get(y,[]):
1979             if a not in F[x]: F[x].append(a)
1980     if N[x] == d:
1981        N[stack[-1]] = MAXINT
1982        F[stack[-1]] = F[x]
1983        element = stack.pop()
1984        while element != x:
1985            N[stack[-1]] = MAXINT
1986            F[stack[-1]] = F[x]
1987            element = stack.pop()
1988
1989 class LALRError(YaccError): pass
1990
1991 # -----------------------------------------------------------------------------
1992 #                             == LRGeneratedTable ==
1993 #
1994 # This class implements the LR table generation algorithm.  There are no
1995 # public methods except for write()
1996 # -----------------------------------------------------------------------------
1997
1998 class LRGeneratedTable(LRTable):
1999     def __init__(self,grammar,method='LALR',log=None):
2000         if method not in ['SLR','LALR']:
2001             raise LALRError("Unsupported method %s" % method)
2002
2003         self.grammar = grammar
2004         self.lr_method = method
2005
2006         # Set up the logger
2007         if not log:
2008             log = NullLogger()
2009         self.log = log
2010
2011         # Internal attributes
2012         self.lr_action     = {}        # Action table
2013         self.lr_goto       = {}        # Goto table
2014         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
2015         self.lr_goto_cache = {}        # Cache of computed gotos
2016         self.lr0_cidhash   = {}        # Cache of closures
2017
2018         self._add_count    = 0         # Internal counter used to detect cycles
2019
2020         # Diagonistic information filled in by the table generator
2021         self.sr_conflict   = 0
2022         self.rr_conflict   = 0
2023         self.conflicts     = []        # List of conflicts
2024
2025         self.sr_conflicts  = []
2026         self.rr_conflicts  = []
2027
2028         # Build the tables
2029         self.grammar.build_lritems()
2030         self.grammar.compute_first()
2031         self.grammar.compute_follow()
2032         self.lr_parse_table()
2033
2034     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2035
2036     def lr0_closure(self,I):
2037         self._add_count += 1
2038
2039         # Add everything in I to J
2040         J = I[:]
2041         didadd = 1
2042         while didadd:
2043             didadd = 0
2044             for j in J:
2045                 for x in j.lr_after:
2046                     if getattr(x,"lr0_added",0) == self._add_count: continue
2047                     # Add B --> .G to J
2048                     J.append(x.lr_next)
2049                     x.lr0_added = self._add_count
2050                     didadd = 1
2051
2052         return J
2053
2054     # Compute the LR(0) goto function goto(I,X) where I is a set
2055     # of LR(0) items and X is a grammar symbol.   This function is written
2056     # in a way that guarantees uniqueness of the generated goto sets
2057     # (i.e. the same goto set will never be returned as two different Python
2058     # objects).  With uniqueness, we can later do fast set comparisons using
2059     # id(obj) instead of element-wise comparison.
2060
2061     def lr0_goto(self,I,x):
2062         # First we look for a previously cached entry
2063         g = self.lr_goto_cache.get((id(I),x))
2064         if g: return g
2065
2066         # Now we generate the goto set in a way that guarantees uniqueness
2067         # of the result
2068
2069         s = self.lr_goto_cache.get(x)
2070         if not s:
2071             s = { }
2072             self.lr_goto_cache[x] = s
2073
2074         gs = [ ]
2075         for p in I:
2076             n = p.lr_next
2077             if n and n.lr_before == x:
2078                 s1 = s.get(id(n))
2079                 if not s1:
2080                     s1 = { }
2081                     s[id(n)] = s1
2082                 gs.append(n)
2083                 s = s1
2084         g = s.get('$end')
2085         if not g:
2086             if gs:
2087                 g = self.lr0_closure(gs)
2088                 s['$end'] = g
2089             else:
2090                 s['$end'] = gs
2091         self.lr_goto_cache[(id(I),x)] = g
2092         return g
2093
2094     # Compute the LR(0) sets of item function
2095     def lr0_items(self):
2096
2097         C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2098         i = 0
2099         for I in C:
2100             self.lr0_cidhash[id(I)] = i
2101             i += 1
2102
2103         # Loop over the items in C and each grammar symbols
2104         i = 0
2105         while i < len(C):
2106             I = C[i]
2107             i += 1
2108
2109             # Collect all of the symbols that could possibly be in the goto(I,X) sets
2110             asyms = { }
2111             for ii in I:
2112                 for s in ii.usyms:
2113                     asyms[s] = None
2114
2115             for x in asyms:
2116                 g = self.lr0_goto(I,x)
2117                 if not g:  continue
2118                 if id(g) in self.lr0_cidhash: continue
2119                 self.lr0_cidhash[id(g)] = len(C)
2120                 C.append(g)
2121
2122         return C
2123
2124     # -----------------------------------------------------------------------------
2125     #                       ==== LALR(1) Parsing ====
2126     #
2127     # LALR(1) parsing is almost exactly the same as SLR except that instead of
2128     # relying upon Follow() sets when performing reductions, a more selective
2129     # lookahead set that incorporates the state of the LR(0) machine is utilized.
2130     # Thus, we mainly just have to focus on calculating the lookahead sets.
2131     #
2132     # The method used here is due to DeRemer and Pennelo (1982).
2133     #
2134     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2135     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2136     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2137     #
2138     # Further details can also be found in:
2139     #
2140     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2141     #      McGraw-Hill Book Company, (1985).
2142     #
2143     # -----------------------------------------------------------------------------
2144
2145     # -----------------------------------------------------------------------------
2146     # compute_nullable_nonterminals()
2147     #
2148     # Creates a dictionary containing all of the non-terminals that might produce
2149     # an empty production.
2150     # -----------------------------------------------------------------------------
2151
2152     def compute_nullable_nonterminals(self):
2153         nullable = {}
2154         num_nullable = 0
2155         while 1:
2156            for p in self.grammar.Productions[1:]:
2157                if p.len == 0:
2158                     nullable[p.name] = 1
2159                     continue
2160                for t in p.prod:
2161                     if not t in nullable: break
2162                else:
2163                     nullable[p.name] = 1
2164            if len(nullable) == num_nullable: break
2165            num_nullable = len(nullable)
2166         return nullable
2167
2168     # -----------------------------------------------------------------------------
2169     # find_nonterminal_trans(C)
2170     #
2171     # Given a set of LR(0) items, this functions finds all of the non-terminal
2172     # transitions.    These are transitions in which a dot appears immediately before
2173     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2174     # is the state number and N is the nonterminal symbol.
2175     #
2176     # The input C is the set of LR(0) items.
2177     # -----------------------------------------------------------------------------
2178
2179     def find_nonterminal_transitions(self,C):
2180          trans = []
2181          for state in range(len(C)):
2182              for p in C[state]:
2183                  if p.lr_index < p.len - 1:
2184                       t = (state,p.prod[p.lr_index+1])
2185                       if t[1] in self.grammar.Nonterminals:
2186                             if t not in trans: trans.append(t)
2187              state = state + 1
2188          return trans
2189
2190     # -----------------------------------------------------------------------------
2191     # dr_relation()
2192     #
2193     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2194     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2195     #
2196     # Returns a list of terminals.
2197     # -----------------------------------------------------------------------------
2198
2199     def dr_relation(self,C,trans,nullable):
2200         dr_set = { }
2201         state,N = trans
2202         terms = []
2203
2204         g = self.lr0_goto(C[state],N)
2205         for p in g:
2206            if p.lr_index < p.len - 1:
2207                a = p.prod[p.lr_index+1]
2208                if a in self.grammar.Terminals:
2209                    if a not in terms: terms.append(a)
2210
2211         # This extra bit is to handle the start state
2212         if state == 0 and N == self.grammar.Productions[0].prod[0]:
2213            terms.append('$end')
2214
2215         return terms
2216
2217     # -----------------------------------------------------------------------------
2218     # reads_relation()
2219     #
2220     # Computes the READS() relation (p,A) READS (t,C).
2221     # -----------------------------------------------------------------------------
2222
2223     def reads_relation(self,C, trans, empty):
2224         # Look for empty transitions
2225         rel = []
2226         state, N = trans
2227
2228         g = self.lr0_goto(C[state],N)
2229         j = self.lr0_cidhash.get(id(g),-1)
2230         for p in g:
2231             if p.lr_index < p.len - 1:
2232                  a = p.prod[p.lr_index + 1]
2233                  if a in empty:
2234                       rel.append((j,a))
2235
2236         return rel
2237
2238     # -----------------------------------------------------------------------------
2239     # compute_lookback_includes()
2240     #
2241     # Determines the lookback and includes relations
2242     #
2243     # LOOKBACK:
2244     #
2245     # This relation is determined by running the LR(0) state machine forward.
2246     # For example, starting with a production "N : . A B C", we run it forward
2247     # to obtain "N : A B C ."   We then build a relationship between this final
2248     # state and the starting state.   These relationships are stored in a dictionary
2249     # lookdict.
2250     #
2251     # INCLUDES:
2252     #
2253     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2254     #
2255     # This relation is used to determine non-terminal transitions that occur
2256     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2257     # if the following holds:
2258     #
2259     #       B -> LAT, where T -> epsilon and p' -L-> p
2260     #
2261     # L is essentially a prefix (which may be empty), T is a suffix that must be
2262     # able to derive an empty string.  State p' must lead to state p with the string L.
2263     #
2264     # -----------------------------------------------------------------------------
2265
2266     def compute_lookback_includes(self,C,trans,nullable):
2267
2268         lookdict = {}          # Dictionary of lookback relations
2269         includedict = {}       # Dictionary of include relations
2270
2271         # Make a dictionary of non-terminal transitions
2272         dtrans = {}
2273         for t in trans:
2274             dtrans[t] = 1
2275
2276         # Loop over all transitions and compute lookbacks and includes
2277         for state,N in trans:
2278             lookb = []
2279             includes = []
2280             for p in C[state]:
2281                 if p.name != N: continue
2282
2283                 # Okay, we have a name match.  We now follow the production all the way
2284                 # through the state machine until we get the . on the right hand side
2285
2286                 lr_index = p.lr_index
2287                 j = state
2288                 while lr_index < p.len - 1:
2289                      lr_index = lr_index + 1
2290                      t = p.prod[lr_index]
2291
2292                      # Check to see if this symbol and state are a non-terminal transition
2293                      if (j,t) in dtrans:
2294                            # Yes.  Okay, there is some chance that this is an includes relation
2295                            # the only way to know for certain is whether the rest of the
2296                            # production derives empty
2297
2298                            li = lr_index + 1
2299                            while li < p.len:
2300                                 if p.prod[li] in self.grammar.Terminals: break      # No forget it
2301                                 if not p.prod[li] in nullable: break
2302                                 li = li + 1
2303                            else:
2304                                 # Appears to be a relation between (j,t) and (state,N)
2305                                 includes.append((j,t))
2306
2307                      g = self.lr0_goto(C[j],t)               # Go to next set
2308                      j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
2309
2310                 # When we get here, j is the final state, now we have to locate the production
2311                 for r in C[j]:
2312                      if r.name != p.name: continue
2313                      if r.len != p.len:   continue
2314                      i = 0
2315                      # This look is comparing a production ". A B C" with "A B C ."
2316                      while i < r.lr_index:
2317                           if r.prod[i] != p.prod[i+1]: break
2318                           i = i + 1
2319                      else:
2320                           lookb.append((j,r))
2321             for i in includes:
2322                  if not i in includedict: includedict[i] = []
2323                  includedict[i].append((state,N))
2324             lookdict[(state,N)] = lookb
2325
2326         return lookdict,includedict
2327
2328     # -----------------------------------------------------------------------------
2329     # compute_read_sets()
2330     #
2331     # Given a set of LR(0) items, this function computes the read sets.
2332     #
2333     # Inputs:  C        =  Set of LR(0) items
2334     #          ntrans   = Set of nonterminal transitions
2335     #          nullable = Set of empty transitions
2336     #
2337     # Returns a set containing the read sets
2338     # -----------------------------------------------------------------------------
2339
2340     def compute_read_sets(self,C, ntrans, nullable):
2341         FP = lambda x: self.dr_relation(C,x,nullable)
2342         R =  lambda x: self.reads_relation(C,x,nullable)
2343         F = digraph(ntrans,R,FP)
2344         return F
2345
2346     # -----------------------------------------------------------------------------
2347     # compute_follow_sets()
2348     #
2349     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2350     # and an include set, this function computes the follow sets
2351     #
2352     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2353     #
2354     # Inputs:
2355     #            ntrans     = Set of nonterminal transitions
2356     #            readsets   = Readset (previously computed)
2357     #            inclsets   = Include sets (previously computed)
2358     #
2359     # Returns a set containing the follow sets
2360     # -----------------------------------------------------------------------------
2361
2362     def compute_follow_sets(self,ntrans,readsets,inclsets):
2363          FP = lambda x: readsets[x]
2364          R  = lambda x: inclsets.get(x,[])
2365          F = digraph(ntrans,R,FP)
2366          return F
2367
2368     # -----------------------------------------------------------------------------
2369     # add_lookaheads()
2370     #
2371     # Attaches the lookahead symbols to grammar rules.
2372     #
2373     # Inputs:    lookbacks         -  Set of lookback relations
2374     #            followset         -  Computed follow set
2375     #
2376     # This function directly attaches the lookaheads to productions contained
2377     # in the lookbacks set
2378     # -----------------------------------------------------------------------------
2379
2380     def add_lookaheads(self,lookbacks,followset):
2381         for trans,lb in lookbacks.items():
2382             # Loop over productions in lookback
2383             for state,p in lb:
2384                  if not state in p.lookaheads:
2385                       p.lookaheads[state] = []
2386                  f = followset.get(trans,[])
2387                  for a in f:
2388                       if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2389
2390     # -----------------------------------------------------------------------------
2391     # add_lalr_lookaheads()
2392     #
2393     # This function does all of the work of adding lookahead information for use
2394     # with LALR parsing
2395     # -----------------------------------------------------------------------------
2396
2397     def add_lalr_lookaheads(self,C):
2398         # Determine all of the nullable nonterminals
2399         nullable = self.compute_nullable_nonterminals()
2400
2401         # Find all non-terminal transitions
2402         trans = self.find_nonterminal_transitions(C)
2403
2404         # Compute read sets
2405         readsets = self.compute_read_sets(C,trans,nullable)
2406
2407         # Compute lookback/includes relations
2408         lookd, included = self.compute_lookback_includes(C,trans,nullable)
2409
2410         # Compute LALR FOLLOW sets
2411         followsets = self.compute_follow_sets(trans,readsets,included)
2412
2413         # Add all of the lookaheads
2414         self.add_lookaheads(lookd,followsets)
2415
2416     # -----------------------------------------------------------------------------
2417     # lr_parse_table()
2418     #
2419     # This function constructs the parse tables for SLR or LALR
2420     # -----------------------------------------------------------------------------
2421     def lr_parse_table(self):
2422         Productions = self.grammar.Productions
2423         Precedence  = self.grammar.Precedence
2424         goto   = self.lr_goto         # Goto array
2425         action = self.lr_action       # Action array
2426         log    = self.log             # Logger for output
2427
2428         actionp = { }                 # Action production array (temporary)
2429
2430         log.info("Parsing method: %s", self.lr_method)
2431
2432         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2433         # This determines the number of states
2434
2435         C = self.lr0_items()
2436
2437         if self.lr_method == 'LALR':
2438             self.add_lalr_lookaheads(C)
2439
2440         # Build the parser table, state by state
2441         st = 0
2442         for I in C:
2443             # Loop over each production in I
2444             actlist = [ ]              # List of actions
2445             st_action  = { }
2446             st_actionp = { }
2447             st_goto    = { }
2448             log.info("")
2449             log.info("state %d", st)
2450             log.info("")
2451             for p in I:
2452                 log.info("    (%d) %s", p.number, str(p))
2453             log.info("")
2454
2455             for p in I:
2456                     if p.len == p.lr_index + 1:
2457                         if p.name == "S'":
2458                             # Start symbol. Accept!
2459                             st_action["$end"] = 0
2460                             st_actionp["$end"] = p
2461                         else:
2462                             # We are at the end of a production.  Reduce!
2463                             if self.lr_method == 'LALR':
2464                                 laheads = p.lookaheads[st]
2465                             else:
2466                                 laheads = self.grammar.Follow[p.name]
2467                             for a in laheads:
2468                                 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2469                                 r = st_action.get(a)
2470                                 if r is not None:
2471                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
2472                                     if r > 0:
2473                                         # Need to decide on shift or reduce here
2474                                         # By default we favor shifting. Need to add
2475                                         # some precedence rules here.
2476                                         sprec,slevel = Productions[st_actionp[a].number].prec
2477                                         rprec,rlevel = Precedence.get(a,('right',0))
2478                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2479                                             # We really need to reduce here.
2480                                             st_action[a] = -p.number
2481                                             st_actionp[a] = p
2482                                             if not slevel and not rlevel:
2483                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2484                                                 self.sr_conflicts.append((st,a,'reduce'))
2485                                             Productions[p.number].reduced += 1
2486                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2487                                             st_action[a] = None
2488                                         else:
2489                                             # Hmmm. Guess we'll keep the shift
2490                                             if not rlevel:
2491                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2492                                                 self.sr_conflicts.append((st,a,'shift'))
2493                                     elif r < 0:
2494                                         # Reduce/reduce conflict.   In this case, we favor the rule
2495                                         # that was defined first in the grammar file
2496                                         oldp = Productions[-r]
2497                                         pp = Productions[p.number]
2498                                         if oldp.line > pp.line:
2499                                             st_action[a] = -p.number
2500                                             st_actionp[a] = p
2501                                             chosenp,rejectp = pp,oldp
2502                                             Productions[p.number].reduced += 1
2503                                             Productions[oldp.number].reduced -= 1
2504                                         else:
2505                                             chosenp,rejectp = oldp,pp
2506                                         self.rr_conflicts.append((st,chosenp,rejectp))
2507                                         log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2508                                     else:
2509                                         raise LALRError("Unknown conflict in state %d" % st)
2510                                 else:
2511                                     st_action[a] = -p.number
2512                                     st_actionp[a] = p
2513                                     Productions[p.number].reduced += 1
2514                     else:
2515                         i = p.lr_index
2516                         a = p.prod[i+1]       # Get symbol right after the "."
2517                         if a in self.grammar.Terminals:
2518                             g = self.lr0_goto(I,a)
2519                             j = self.lr0_cidhash.get(id(g),-1)
2520                             if j >= 0:
2521                                 # We are in a shift state
2522                                 actlist.append((a,p,"shift and go to state %d" % j))
2523                                 r = st_action.get(a)
2524                                 if r is not None:
2525                                     # Whoa have a shift/reduce or shift/shift conflict
2526                                     if r > 0:
2527                                         if r != j:
2528                                             raise LALRError("Shift/shift conflict in state %d" % st)
2529                                     elif r < 0:
2530                                         # Do a precedence check.
2531                                         #   -  if precedence of reduce rule is higher, we reduce.
2532                                         #   -  if precedence of reduce is same and left assoc, we reduce.
2533                                         #   -  otherwise we shift
2534                                         rprec,rlevel = Productions[st_actionp[a].number].prec
2535                                         sprec,slevel = Precedence.get(a,('right',0))
2536                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2537                                             # We decide to shift here... highest precedence to shift
2538                                             Productions[st_actionp[a].number].reduced -= 1
2539                                             st_action[a] = j
2540                                             st_actionp[a] = p
2541                                             if not rlevel:
2542                                                 log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2543                                                 self.sr_conflicts.append((st,a,'shift'))
2544                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2545                                             st_action[a] = None
2546                                         else:
2547                                             # Hmmm. Guess we'll keep the reduce
2548                                             if not slevel and not rlevel:
2549                                                 log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2550                                                 self.sr_conflicts.append((st,a,'reduce'))
2551
2552                                     else:
2553                                         raise LALRError("Unknown conflict in state %d" % st)
2554                                 else:
2555                                     st_action[a] = j
2556                                     st_actionp[a] = p
2557
2558             # Print the actions associated with each terminal
2559             _actprint = { }
2560             for a,p,m in actlist:
2561                 if a in st_action:
2562                     if p is st_actionp[a]:
2563                         log.info("    %-15s %s",a,m)
2564                         _actprint[(a,m)] = 1
2565             log.info("")
2566             # Print the actions that were not used. (debugging)
2567             not_used = 0
2568             for a,p,m in actlist:
2569                 if a in st_action:
2570                     if p is not st_actionp[a]:
2571                         if not (a,m) in _actprint:
2572                             log.debug("  ! %-15s [ %s ]",a,m)
2573                             not_used = 1
2574                             _actprint[(a,m)] = 1
2575             if not_used:
2576                 log.debug("")
2577
2578             # Construct the goto table for this state
2579
2580             nkeys = { }
2581             for ii in I:
2582                 for s in ii.usyms:
2583                     if s in self.grammar.Nonterminals:
2584                         nkeys[s] = None
2585             for n in nkeys:
2586                 g = self.lr0_goto(I,n)
2587                 j = self.lr0_cidhash.get(id(g),-1)
2588                 if j >= 0:
2589                     st_goto[n] = j
2590                     log.info("    %-30s shift and go to state %d",n,j)
2591
2592             action[st] = st_action
2593             actionp[st] = st_actionp
2594             goto[st] = st_goto
2595             st += 1
2596
2597
2598     # -----------------------------------------------------------------------------
2599     # write()
2600     #
2601     # This function writes the LR parsing tables to a file
2602     # -----------------------------------------------------------------------------
2603
2604     def write_table(self,modulename,outputdir='',signature=""):
2605         basemodulename = modulename.split(".")[-1]
2606         filename = os.path.join(outputdir,basemodulename) + ".py"
2607         try:
2608             f = open(filename,"w")
2609
2610             f.write("""
2611 # %s
2612 # This file is automatically generated. Do not edit.
2613 _tabversion = %r
2614
2615 _lr_method = %r
2616
2617 _lr_signature = %r
2618     """ % (filename, __tabversion__, self.lr_method, signature))
2619
2620             # Change smaller to 0 to go back to original tables
2621             smaller = 1
2622
2623             # Factor out names to try and make smaller
2624             if smaller:
2625                 items = { }
2626
2627                 for s,nd in self.lr_action.items():
2628                    for name,v in nd.items():
2629                       i = items.get(name)
2630                       if not i:
2631                          i = ([],[])
2632                          items[name] = i
2633                       i[0].append(s)
2634                       i[1].append(v)
2635
2636                 f.write("\n_lr_action_items = {")
2637                 for k,v in items.items():
2638                     f.write("%r:([" % k)
2639                     for i in v[0]:
2640                         f.write("%r," % i)
2641                     f.write("],[")
2642                     for i in v[1]:
2643                         f.write("%r," % i)
2644
2645                     f.write("]),")
2646                 f.write("}\n")
2647
2648                 f.write("""
2649 _lr_action = { }
2650 for _k, _v in _lr_action_items.items():
2651    for _x,_y in zip(_v[0],_v[1]):
2652       if not _x in _lr_action:  _lr_action[_x] = { }
2653       _lr_action[_x][_k] = _y
2654 del _lr_action_items
2655 """)
2656
2657             else:
2658                 f.write("\n_lr_action = { ");
2659                 for k,v in self.lr_action.items():
2660                     f.write("(%r,%r):%r," % (k[0],k[1],v))
2661                 f.write("}\n");
2662
2663             if smaller:
2664                 # Factor out names to try and make smaller
2665                 items = { }
2666
2667                 for s,nd in self.lr_goto.items():
2668                    for name,v in nd.items():
2669                       i = items.get(name)
2670                       if not i:
2671                          i = ([],[])
2672                          items[name] = i
2673                       i[0].append(s)
2674                       i[1].append(v)
2675
2676                 f.write("\n_lr_goto_items = {")
2677                 for k,v in items.items():
2678                     f.write("%r:([" % k)
2679                     for i in v[0]:
2680                         f.write("%r," % i)
2681                     f.write("],[")
2682                     for i in v[1]:
2683                         f.write("%r," % i)
2684
2685                     f.write("]),")
2686                 f.write("}\n")
2687
2688                 f.write("""
2689 _lr_goto = { }
2690 for _k, _v in _lr_goto_items.items():
2691    for _x,_y in zip(_v[0],_v[1]):
2692        if not _x in _lr_goto: _lr_goto[_x] = { }
2693        _lr_goto[_x][_k] = _y
2694 del _lr_goto_items
2695 """)
2696             else:
2697                 f.write("\n_lr_goto = { ");
2698                 for k,v in self.lr_goto.items():
2699                     f.write("(%r,%r):%r," % (k[0],k[1],v))
2700                 f.write("}\n");
2701
2702             # Write production table
2703             f.write("_lr_productions = [\n")
2704             for p in self.lr_productions:
2705                 if p.func:
2706                     f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2707                 else:
2708                     f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2709             f.write("]\n")
2710             f.close()
2711
2712         except IOError:
2713             e = sys.exc_info()[1]
2714             sys.stderr.write("Unable to create %r\n" % filename)
2715             sys.stderr.write(str(e)+"\n")
2716             return
2717
2718
2719     # -----------------------------------------------------------------------------
2720     # pickle_table()
2721     #
2722     # This function pickles the LR parsing tables to a supplied file object
2723     # -----------------------------------------------------------------------------
2724
2725     def pickle_table(self,filename,signature=""):
2726         try:
2727             import cPickle as pickle
2728         except ImportError:
2729             import pickle
2730         outf = open(filename,"wb")
2731         pickle.dump(__tabversion__,outf,pickle_protocol)
2732         pickle.dump(self.lr_method,outf,pickle_protocol)
2733         pickle.dump(signature,outf,pickle_protocol)
2734         pickle.dump(self.lr_action,outf,pickle_protocol)
2735         pickle.dump(self.lr_goto,outf,pickle_protocol)
2736
2737         outp = []
2738         for p in self.lr_productions:
2739             if p.func:
2740                 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2741             else:
2742                 outp.append((str(p),p.name,p.len,None,None,None))
2743         pickle.dump(outp,outf,pickle_protocol)
2744         outf.close()
2745
2746 # -----------------------------------------------------------------------------
2747 #                            === INTROSPECTION ===
2748 #
2749 # The following functions and classes are used to implement the PLY
2750 # introspection features followed by the yacc() function itself.
2751 # -----------------------------------------------------------------------------
2752
2753 # -----------------------------------------------------------------------------
2754 # get_caller_module_dict()
2755 #
2756 # This function returns a dictionary containing all of the symbols defined within
2757 # a caller further down the call stack.  This is used to get the environment
2758 # associated with the yacc() call if none was provided.
2759 # -----------------------------------------------------------------------------
2760
2761 def get_caller_module_dict(levels):
2762     try:
2763         raise RuntimeError
2764     except RuntimeError:
2765         e,b,t = sys.exc_info()
2766         f = t.tb_frame
2767         while levels > 0:
2768             f = f.f_back
2769             levels -= 1
2770         ldict = f.f_globals.copy()
2771         if f.f_globals != f.f_locals:
2772             ldict.update(f.f_locals)
2773
2774         return ldict
2775
2776 # -----------------------------------------------------------------------------
2777 # parse_grammar()
2778 #
2779 # This takes a raw grammar rule string and parses it into production data
2780 # -----------------------------------------------------------------------------
2781 def parse_grammar(doc,file,line):
2782     grammar = []
2783     # Split the doc string into lines
2784     pstrings = doc.splitlines()
2785     lastp = None
2786     dline = line
2787     for ps in pstrings:
2788         dline += 1
2789         p = ps.split()
2790         if not p: continue
2791         try:
2792             if p[0] == '|':
2793                 # This is a continuation of a previous rule
2794                 if not lastp:
2795                     raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2796                 prodname = lastp
2797                 syms = p[1:]
2798             else:
2799                 prodname = p[0]
2800                 lastp = prodname
2801                 syms   = p[2:]
2802                 assign = p[1]
2803                 if assign != ':' and assign != '::=':
2804                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2805
2806             grammar.append((file,dline,prodname,syms))
2807         except SyntaxError:
2808             raise
2809         except Exception:
2810             raise SyntaxError("%s:%d: Syntax error in rule %r" % (file,dline,ps.strip()))
2811
2812     return grammar
2813
2814 # -----------------------------------------------------------------------------
2815 # ParserReflect()
2816 #
2817 # This class represents information extracted for building a parser including
2818 # start symbol, error function, tokens, precedence list, action functions,
2819 # etc.
2820 # -----------------------------------------------------------------------------
2821 class ParserReflect(object):
2822     def __init__(self,pdict,log=None):
2823         self.pdict      = pdict
2824         self.start      = None
2825         self.error_func = None
2826         self.tokens     = None
2827         self.modules    = {}
2828         self.grammar    = []
2829         self.error      = 0
2830
2831         if log is None:
2832             self.log = PlyLogger(sys.stderr)
2833         else:
2834             self.log = log
2835
2836     # Get all of the basic information
2837     def get_all(self):
2838         self.get_start()
2839         self.get_error_func()
2840         self.get_tokens()
2841         self.get_precedence()
2842         self.get_pfunctions()
2843
2844     # Validate all of the information
2845     def validate_all(self):
2846         self.validate_start()
2847         self.validate_error_func()
2848         self.validate_tokens()
2849         self.validate_precedence()
2850         self.validate_pfunctions()
2851         self.validate_modules()
2852         return self.error
2853
2854     # Compute a signature over the grammar
2855     def signature(self):
2856         try:
2857             from hashlib import md5
2858         except ImportError:
2859             from md5 import md5
2860         try:
2861             sig = md5()
2862             if self.start:
2863                 sig.update(self.start.encode('latin-1'))
2864             if self.prec:
2865                 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2866             if self.tokens:
2867                 sig.update(" ".join(self.tokens).encode('latin-1'))
2868             for f in self.pfuncs:
2869                 if f[3]:
2870                     sig.update(f[3].encode('latin-1'))
2871         except (TypeError,ValueError):
2872             pass
2873         return sig.digest()
2874
2875     # -----------------------------------------------------------------------------
2876     # validate_modules()
2877     #
2878     # This method checks to see if there are duplicated p_rulename() functions
2879     # in the parser module file.  Without this function, it is really easy for
2880     # users to make mistakes by cutting and pasting code fragments (and it's a real
2881     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2882     # we just do a little regular expression pattern matching of def statements
2883     # to try and detect duplicates.
2884     # -----------------------------------------------------------------------------
2885
2886     def validate_modules(self):
2887         # Match def p_funcname(
2888         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2889
2890         for module in self.modules.keys():
2891             lines, linen = inspect.getsourcelines(module)
2892
2893             counthash = { }
2894             for linen,l in enumerate(lines):
2895                 linen += 1
2896                 m = fre.match(l)
2897                 if m:
2898                     name = m.group(1)
2899                     prev = counthash.get(name)
2900                     if not prev:
2901                         counthash[name] = linen
2902                     else:
2903                         filename = inspect.getsourcefile(module)
2904                         self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2905
2906     # Get the start symbol
2907     def get_start(self):
2908         self.start = self.pdict.get('start')
2909
2910     # Validate the start symbol
2911     def validate_start(self):
2912         if self.start is not None:
2913             if not isinstance(self.start, string_types):
2914                 self.log.error("'start' must be a string")
2915
2916     # Look for error handler
2917     def get_error_func(self):
2918         self.error_func = self.pdict.get('p_error')
2919
2920     # Validate the error function
2921     def validate_error_func(self):
2922         if self.error_func:
2923             if isinstance(self.error_func,types.FunctionType):
2924                 ismethod = 0
2925             elif isinstance(self.error_func, types.MethodType):
2926                 ismethod = 1
2927             else:
2928                 self.log.error("'p_error' defined, but is not a function or method")
2929                 self.error = 1
2930                 return
2931
2932             eline = func_code(self.error_func).co_firstlineno
2933             efile = func_code(self.error_func).co_filename
2934             module = inspect.getmodule(self.error_func)
2935             self.modules[module] = 1
2936
2937             argcount = func_code(self.error_func).co_argcount - ismethod
2938             if argcount != 1:
2939                 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2940                 self.error = 1
2941
2942     # Get the tokens map
2943     def get_tokens(self):
2944         tokens = self.pdict.get("tokens")
2945         if not tokens:
2946             self.log.error("No token list is defined")
2947             self.error = 1
2948             return
2949
2950         if not isinstance(tokens,(list, tuple)):
2951             self.log.error("tokens must be a list or tuple")
2952             self.error = 1
2953             return
2954
2955         if not tokens:
2956             self.log.error("tokens is empty")
2957             self.error = 1
2958             return
2959
2960         self.tokens = tokens
2961
2962     # Validate the tokens
2963     def validate_tokens(self):
2964         # Validate the tokens.
2965         if 'error' in self.tokens:
2966             self.log.error("Illegal token name 'error'. Is a reserved word")
2967             self.error = 1
2968             return
2969
2970         terminals = {}
2971         for n in self.tokens:
2972             if n in terminals:
2973                 self.log.warning("Token %r multiply defined", n)
2974             terminals[n] = 1
2975
2976     # Get the precedence map (if any)
2977     def get_precedence(self):
2978         self.prec = self.pdict.get("precedence")
2979
2980     # Validate and parse the precedence map
2981     def validate_precedence(self):
2982         preclist = []
2983         if self.prec:
2984             if not isinstance(self.prec,(list,tuple)):
2985                 self.log.error("precedence must be a list or tuple")
2986                 self.error = 1
2987                 return
2988             for level,p in enumerate(self.prec):
2989                 if not isinstance(p,(list,tuple)):
2990                     self.log.error("Bad precedence table")
2991                     self.error = 1
2992                     return
2993
2994                 if len(p) < 2:
2995                     self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2996                     self.error = 1
2997                     return
2998                 assoc = p[0]
2999                 if not isinstance(assoc, string_types):
3000                     self.log.error("precedence associativity must be a string")
3001                     self.error = 1
3002                     return
3003                 for term in p[1:]:
3004                     if not isinstance(term, string_types):
3005                         self.log.error("precedence items must be strings")
3006                         self.error = 1
3007                         return
3008                     preclist.append((term, assoc, level+1))
3009         self.preclist = preclist
3010
3011     # Get all p_functions from the grammar
3012     def get_pfunctions(self):
3013         p_functions = []
3014         for name, item in self.pdict.items():
3015             if not name.startswith('p_'): continue
3016             if name == 'p_error': continue
3017             if isinstance(item,(types.FunctionType,types.MethodType)):
3018                 line = func_code(item).co_firstlineno
3019                 module = inspect.getmodule(item)
3020                 p_functions.append((line,module,name,item.__doc__))
3021
3022         # Sort all of the actions by line number
3023         p_functions.sort()
3024         self.pfuncs = p_functions
3025
3026
3027     # Validate all of the p_functions
3028     def validate_pfunctions(self):
3029         grammar = []
3030         # Check for non-empty symbols
3031         if len(self.pfuncs) == 0:
3032             self.log.error("no rules of the form p_rulename are defined")
3033             self.error = 1
3034             return
3035
3036         for line, module, name, doc in self.pfuncs:
3037             file = inspect.getsourcefile(module)
3038             func = self.pdict[name]
3039             if isinstance(func, types.MethodType):
3040                 reqargs = 2
3041             else:
3042                 reqargs = 1
3043             if func_code(func).co_argcount > reqargs:
3044                 self.log.error("%s:%d: Rule %r has too many arguments",file,line,func.__name__)
3045                 self.error = 1
3046             elif func_code(func).co_argcount < reqargs:
3047                 self.log.error("%s:%d: Rule %r requires an argument",file,line,func.__name__)
3048                 self.error = 1
3049             elif not func.__doc__:
3050                 self.log.warning("%s:%d: No documentation string specified in function %r (ignored)",file,line,func.__name__)
3051             else:
3052                 try:
3053                     parsed_g = parse_grammar(doc,file,line)
3054                     for g in parsed_g:
3055                         grammar.append((name, g))
3056                 except SyntaxError:
3057                     e = sys.exc_info()[1]
3058                     self.log.error(str(e))
3059                     self.error = 1
3060
3061                 # Looks like a valid grammar rule
3062                 # Mark the file in which defined.
3063                 self.modules[module] = 1
3064
3065         # Secondary validation step that looks for p_ definitions that are not functions
3066         # or functions that look like they might be grammar rules.
3067
3068         for n,v in self.pdict.items():
3069             if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): continue
3070             if n.startswith('t_'): continue
3071             if n.startswith('p_') and n != 'p_error':
3072                 self.log.warning("%r not defined as a function", n)
3073             if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3074                 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3075                 try:
3076                     doc = v.__doc__.split(" ")
3077                     if doc[1] == ':':
3078                         self.log.warning("%s:%d: Possible grammar rule %r defined without p_ prefix",
3079                                          func_code(v).co_filename, func_code(v).co_firstlineno,n)
3080                 except Exception:
3081                     pass
3082
3083         self.grammar = grammar
3084
3085 # -----------------------------------------------------------------------------
3086 # yacc(module)
3087 #
3088 # Build a parser
3089 # -----------------------------------------------------------------------------
3090
3091 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3092          check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3093          debuglog=None, errorlog = None, picklefile=None):
3094
3095     global parse                 # Reference to the parsing method of the last built parser
3096
3097     # If pickling is enabled, table files are not created
3098
3099     if picklefile:
3100         write_tables = 0
3101
3102     if errorlog is None:
3103         errorlog = PlyLogger(sys.stderr)
3104
3105     # Get the module dictionary used for the parser
3106     if module:
3107         _items = [(k,getattr(module,k)) for k in dir(module)]
3108         pdict = dict(_items)
3109     else:
3110         pdict = get_caller_module_dict(2)
3111
3112     # Collect parser information from the dictionary
3113     pinfo = ParserReflect(pdict,log=errorlog)
3114     pinfo.get_all()
3115
3116     if pinfo.error:
3117         raise YaccError("Unable to build parser")
3118
3119     # Check signature against table files (if any)
3120     signature = pinfo.signature()
3121
3122     # Read the tables
3123     try:
3124         lr = LRTable()
3125         if picklefile:
3126             read_signature = lr.read_pickle(picklefile)
3127         else:
3128             read_signature = lr.read_table(tabmodule)
3129         if optimize or (read_signature == signature):
3130             try:
3131                 lr.bind_callables(pinfo.pdict)
3132                 parser = LRParser(lr,pinfo.error_func)
3133                 parse = parser.parse
3134                 return parser
3135             except Exception:
3136                 e = sys.exc_info()[1]
3137                 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3138     except VersionError:
3139         e = sys.exc_info()
3140         errorlog.warning(str(e))
3141     except Exception:
3142         pass
3143
3144     if debuglog is None:
3145         if debug:
3146             debuglog = PlyLogger(open(debugfile,"w"))
3147         else:
3148             debuglog = NullLogger()
3149
3150     debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3151
3152
3153     errors = 0
3154
3155     # Validate the parser information
3156     if pinfo.validate_all():
3157         raise YaccError("Unable to build parser")
3158
3159     if not pinfo.error_func:
3160         errorlog.warning("no p_error() function is defined")
3161
3162     # Create a grammar object
3163     grammar = Grammar(pinfo.tokens)
3164
3165     # Set precedence level for terminals
3166     for term, assoc, level in pinfo.preclist:
3167         try:
3168             grammar.set_precedence(term,assoc,level)
3169         except GrammarError:
3170             e = sys.exc_info()[1]
3171             errorlog.warning("%s",str(e))
3172
3173     # Add productions to the grammar
3174     for funcname, gram in pinfo.grammar:
3175         file, line, prodname, syms = gram
3176         try:
3177             grammar.add_production(prodname,syms,funcname,file,line)
3178         except GrammarError:
3179             e = sys.exc_info()[1]
3180             errorlog.error("%s",str(e))
3181             errors = 1
3182
3183     # Set the grammar start symbols
3184     try:
3185         if start is None:
3186             grammar.set_start(pinfo.start)
3187         else:
3188             grammar.set_start(start)
3189     except GrammarError:
3190         e = sys.exc_info()[1]
3191         errorlog.error(str(e))
3192         errors = 1
3193
3194     if errors:
3195         raise YaccError("Unable to build parser")
3196
3197     # Verify the grammar structure
3198     undefined_symbols = grammar.undefined_symbols()
3199     for sym, prod in undefined_symbols:
3200         errorlog.error("%s:%d: Symbol %r used, but not defined as a token or a rule",prod.file,prod.line,sym)
3201         errors = 1
3202
3203     unused_terminals = grammar.unused_terminals()
3204     if unused_terminals:
3205         debuglog.info("")
3206         debuglog.info("Unused terminals:")
3207         debuglog.info("")
3208         for term in unused_terminals:
3209             errorlog.warning("Token %r defined, but not used", term)
3210             debuglog.info("    %s", term)
3211
3212     # Print out all productions to the debug log
3213     if debug:
3214         debuglog.info("")
3215         debuglog.info("Grammar")
3216         debuglog.info("")
3217         for n,p in enumerate(grammar.Productions):
3218             debuglog.info("Rule %-5d %s", n, p)
3219
3220     # Find unused non-terminals
3221     unused_rules = grammar.unused_rules()
3222     for prod in unused_rules:
3223         errorlog.warning("%s:%d: Rule %r defined, but not used", prod.file, prod.line, prod.name)
3224
3225     if len(unused_terminals) == 1:
3226         errorlog.warning("There is 1 unused token")
3227     if len(unused_terminals) > 1:
3228         errorlog.warning("There are %d unused tokens", len(unused_terminals))
3229
3230     if len(unused_rules) == 1:
3231         errorlog.warning("There is 1 unused rule")
3232     if len(unused_rules) > 1:
3233         errorlog.warning("There are %d unused rules", len(unused_rules))
3234
3235     if debug:
3236         debuglog.info("")
3237         debuglog.info("Terminals, with rules where they appear")
3238         debuglog.info("")
3239         terms = list(grammar.Terminals)
3240         terms.sort()
3241         for term in terms:
3242             debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3243
3244         debuglog.info("")
3245         debuglog.info("Nonterminals, with rules where they appear")
3246         debuglog.info("")
3247         nonterms = list(grammar.Nonterminals)
3248         nonterms.sort()
3249         for nonterm in nonterms:
3250             debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3251         debuglog.info("")
3252
3253     if check_recursion:
3254         unreachable = grammar.find_unreachable()
3255         for u in unreachable:
3256             errorlog.warning("Symbol %r is unreachable",u)
3257
3258         infinite = grammar.infinite_cycles()
3259         for inf in infinite:
3260             errorlog.error("Infinite recursion detected for symbol %r", inf)
3261             errors = 1
3262
3263     unused_prec = grammar.unused_precedence()
3264     for term, assoc in unused_prec:
3265         errorlog.error("Precedence rule %r defined for unknown symbol %r", assoc, term)
3266         errors = 1
3267
3268     if errors:
3269         raise YaccError("Unable to build parser")
3270
3271     # Run the LRGeneratedTable on the grammar
3272     if debug:
3273         errorlog.debug("Generating %s tables", method)
3274
3275     lr = LRGeneratedTable(grammar,method,debuglog)
3276
3277     if debug:
3278         num_sr = len(lr.sr_conflicts)
3279
3280         # Report shift/reduce and reduce/reduce conflicts
3281         if num_sr == 1:
3282             errorlog.warning("1 shift/reduce conflict")
3283         elif num_sr > 1:
3284             errorlog.warning("%d shift/reduce conflicts", num_sr)
3285
3286         num_rr = len(lr.rr_conflicts)
3287         if num_rr == 1:
3288             errorlog.warning("1 reduce/reduce conflict")
3289         elif num_rr > 1:
3290             errorlog.warning("%d reduce/reduce conflicts", num_rr)
3291
3292     # Write out conflicts to the output file
3293     if debug and (lr.sr_conflicts or lr.rr_conflicts):
3294         debuglog.warning("")
3295         debuglog.warning("Conflicts:")
3296         debuglog.warning("")
3297
3298         for state, tok, resolution in lr.sr_conflicts:
3299             debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
3300
3301         already_reported = {}
3302         for state, rule, rejected in lr.rr_conflicts:
3303             if (state,id(rule),id(rejected)) in already_reported:
3304                 continue
3305             debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3306             debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3307             errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3308             errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3309             already_reported[state,id(rule),id(rejected)] = 1
3310
3311         warned_never = []
3312         for state, rule, rejected in lr.rr_conflicts:
3313             if not rejected.reduced and (rejected not in warned_never):
3314                 debuglog.warning("Rule (%s) is never reduced", rejected)
3315                 errorlog.warning("Rule (%s) is never reduced", rejected)
3316                 warned_never.append(rejected)
3317
3318     # Write the table file if requested
3319     if write_tables:
3320         lr.write_table(tabmodule,outputdir,signature)
3321
3322     # Write a pickled version of the tables
3323     if picklefile:
3324         lr.pickle_table(picklefile,signature)
3325
3326     # Build the parser
3327     lr.bind_callables(pinfo.pdict)
3328     parser = LRParser(lr,pinfo.error_func)
3329
3330     parse = parser.parse
3331     return parser