tools/yacc.py

   1 # -----------------------------------------------------------------------------
   2 # ply: yacc.py
   3 #
   4 # Copyright (C) 2001-2015,
   5 # David M. Beazley (Dabeaz LLC)
   6 # All rights reserved.
   7 #
   8 # SPDX-License-Identifier: BSD-3-Clause
   9 # -----------------------------------------------------------------------------
  10 #
  11 # This implements an LR parser that is constructed from grammar rules defined
  12 # as Python functions. The grammer is specified by supplying the BNF inside
  13 # Python documentation strings.  The inspiration for this technique was borrowed
  14 # from John Aycock's Spark parsing system.  PLY might be viewed as cross between
  15 # Spark and the GNU bison utility.
  16 #
  17 # The current implementation is only somewhat object-oriented. The
  18 # LR parser itself is defined in terms of an object (which allows multiple
  19 # parsers to co-exist).  However, most of the variables used during table
  20 # construction are defined in terms of global variables.  Users shouldn't
  21 # notice unless they are trying to define multiple parsers at the same
  22 # time using threads (in which case they should have their head examined).
  23 #
  24 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
  25 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  26 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  27 # Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
  28 # by the more efficient DeRemer and Pennello algorithm.
  29 #
  30 # :::::::: WARNING :::::::
  31 #
  32 # Construction of LR parsing tables is fairly complicated and expensive.
  33 # To make this module run fast, a *LOT* of work has been put into
  34 # optimization---often at the expensive of readability and what might
  35 # consider to be good Python "coding style."   Modify the code at your
  36 # own risk!
  37 # ----------------------------------------------------------------------------
  38
  39 import re
  40 import types
  41 import sys
  42 import os.path
  43 import inspect
  44 import base64
  45 import warnings
  46
  47 __version__    = '3.8'
  48 __tabversion__ = '3.8'
  49
  50 #-----------------------------------------------------------------------------
  51 #                     === User configurable parameters ===
  52 #
  53 # Change these to modify the default behavior of yacc (if you wish)
  54 #-----------------------------------------------------------------------------
  55
  56 yaccdebug   = True             # Debugging mode.  If set, yacc generates a
  57                                # a 'parser.out' file in the current directory
  58
  59 debug_file  = 'parser.out'     # Default name of the debugging file
  60 tab_module  = 'parsetab'       # Default name of the table module
  61 default_lr  = 'LALR'           # Default LR table generation method
  62
  63 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
  64
  65 yaccdevel   = False            # Set to True if developing yacc.  This turns off optimized
  66                                # implementations of certain functions.
  67
  68 resultlimit = 40               # Size limit of results when running in debug mode.
  69
  70 pickle_protocol = 0            # Protocol to use when writing pickle files
  71
  72 # String type-checking compatibility
  73 if sys.version_info[0] < 3:
  74     string_types = basestring
  75 else:
  76     string_types = str
  77
  78 MAXINT = sys.maxsize
  79
  80 # This object is a stand-in for a logging object created by the
  81 # logging module.   PLY will use this by default to create things
  82 # such as the parser.out file.  If a user wants more detailed
  83 # information, they can create their own logging object and pass
  84 # it into PLY.
  85
  86 class PlyLogger(object):
  87     def __init__(self, f):
  88         self.f = f
  89
  90     def debug(self, msg, *args, **kwargs):
  91         self.f.write((msg % args) + '\n')
  92
  93     info = debug
  94
  95     def warning(self, msg, *args, **kwargs):
  96         self.f.write('WARNING: ' + (msg % args) + '\n')
  97
  98     def error(self, msg, *args, **kwargs):
  99         self.f.write('ERROR: ' + (msg % args) + '\n')
 100
 101     critical = debug
 102
 103 # Null logger is used when no output is generated. Does nothing.
 104 class NullLogger(object):
 105     def __getattribute__(self, name):
 106         return self
 107
 108     def __call__(self, *args, **kwargs):
 109         return self
 110
 111 # Exception raised for yacc-related errors
 112 class YaccError(Exception):
 113     pass
 114
 115 # Format the result message that the parser produces when running in debug mode.
 116 def format_result(r):
 117     repr_str = repr(r)
 118     if '\n' in repr_str:
 119         repr_str = repr(repr_str)
 120     if len(repr_str) > resultlimit:
 121         repr_str = repr_str[:resultlimit] + ' ...'
 122     result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
 123     return result
 124
 125 # Format stack entries when the parser is running in debug mode
 126 def format_stack_entry(r):
 127     repr_str = repr(r)
 128     if '\n' in repr_str:
 129         repr_str = repr(repr_str)
 130     if len(repr_str) < 16:
 131         return repr_str
 132     else:
 133         return '<%s @ 0x%x>' % (type(r).__name__, id(r))
 134
 135 # Panic mode error recovery support.   This feature is being reworked--much of the
 136 # code here is to offer a deprecation/backwards compatible transition
 137
 138 _errok = None
 139 _token = None
 140 _restart = None
 141 _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
 142 Instead, invoke the methods on the associated parser instance:
 143
 144     def p_error(p):
 145         ...
 146         # Use parser.errok(), parser.token(), parser.restart()
 147         ...
 148
 149     parser = yacc.yacc()
 150 '''
 151
 152 def errok():
 153     warnings.warn(_warnmsg)
 154     return _errok()
 155
 156 def restart():
 157     warnings.warn(_warnmsg)
 158     return _restart()
 159
 160 def token():
 161     warnings.warn(_warnmsg)
 162     return _token()
 163
 164 # Utility function to call the p_error() function with some deprecation hacks
 165 def call_errorfunc(errorfunc, token, parser):
 166     global _errok, _token, _restart
 167     _errok = parser.errok
 168     _token = parser.token
 169     _restart = parser.restart
 170     r = errorfunc(token)
 171     try:
 172         del _errok, _token, _restart
 173     except NameError:
 174         pass
 175     return r
 176
 177 #-----------------------------------------------------------------------------
 178 #                        ===  LR Parsing Engine ===
 179 #
 180 # The following classes are used for the LR parser itself.  These are not
 181 # used during table construction and are independent of the actual LR
 182 # table generation algorithm
 183 #-----------------------------------------------------------------------------
 184
 185 # This class is used to hold non-terminal grammar symbols during parsing.
 186 # It normally has the following attributes set:
 187 #        .type       = Grammar symbol type
 188 #        .value      = Symbol value
 189 #        .lineno     = Starting line number
 190 #        .endlineno  = Ending line number (optional, set automatically)
 191 #        .lexpos     = Starting lex position
 192 #        .endlexpos  = Ending lex position (optional, set automatically)
 193
 194 class YaccSymbol:
 195     def __str__(self):
 196         return self.type
 197
 198     def __repr__(self):
 199         return str(self)
 200
 201 # This class is a wrapper around the objects actually passed to each
 202 # grammar rule.   Index lookup and assignment actually assign the
 203 # .value attribute of the underlying YaccSymbol object.
 204 # The lineno() method returns the line number of a given
 205 # item (or 0 if not defined).   The linespan() method returns
 206 # a tuple of (startline,endline) representing the range of lines
 207 # for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
 208 # representing the range of positional information for a symbol.
 209
 210 class YaccProduction:
 211     def __init__(self, s, stack=None):
 212         self.slice = s
 213         self.stack = stack
 214         self.lexer = None
 215         self.parser = None
 216
 217     def __getitem__(self, n):
 218         if isinstance(n, slice):
 219             return [s.value for s in self.slice[n]]
 220         elif n >= 0:
 221             return self.slice[n].value
 222         else:
 223             return self.stack[n].value
 224
 225     def __setitem__(self, n, v):
 226         self.slice[n].value = v
 227
 228     def __getslice__(self, i, j):
 229         return [s.value for s in self.slice[i:j]]
 230
 231     def __len__(self):
 232         return len(self.slice)
 233
 234     def lineno(self, n):
 235         return getattr(self.slice[n], 'lineno', 0)
 236
 237     def set_lineno(self, n, lineno):
 238         self.slice[n].lineno = lineno
 239
 240     def linespan(self, n):
 241         startline = getattr(self.slice[n], 'lineno', 0)
 242         endline = getattr(self.slice[n], 'endlineno', startline)
 243         return startline, endline
 244
 245     def lexpos(self, n):
 246         return getattr(self.slice[n], 'lexpos', 0)
 247
 248     def lexspan(self, n):
 249         startpos = getattr(self.slice[n], 'lexpos', 0)
 250         endpos = getattr(self.slice[n], 'endlexpos', startpos)
 251         return startpos, endpos
 252
 253     def error(self):
 254         raise SyntaxError
 255
 256 # -----------------------------------------------------------------------------
 257 #                               == LRParser ==
 258 #
 259 # The LR Parsing engine.
 260 # -----------------------------------------------------------------------------
 261
 262 class LRParser:
 263     def __init__(self, lrtab, errorf):
 264         self.productions = lrtab.lr_productions
 265         self.action = lrtab.lr_action
 266         self.goto = lrtab.lr_goto
 267         self.errorfunc = errorf
 268         self.set_defaulted_states()
 269         self.errorok = True
 270
 271     def errok(self):
 272         self.errorok = True
 273
 274     def restart(self):
 275         del self.statestack[:]
 276         del self.symstack[:]
 277         sym = YaccSymbol()
 278         sym.type = '$end'
 279         self.symstack.append(sym)
 280         self.statestack.append(0)
 281
 282     # Defaulted state support.
 283     # This method identifies parser states where there is only one possible reduction action.
 284     # For such states, the parser can make a choose to make a rule reduction without consuming
 285     # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
 286     # certain kinds of advanced parsing situations where the lexer and parser interact with
 287     # each other or change states (i.e., manipulation of scope, lexer states, etc.).
 288     #
 289     # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
 290     def set_defaulted_states(self):
 291         self.defaulted_states = {}
 292         for state, actions in self.action.items():
 293             rules = list(actions.values())
 294             if len(rules) == 1 and rules[0] < 0:
 295                 self.defaulted_states[state] = rules[0]
 296
 297     def disable_defaulted_states(self):
 298         self.defaulted_states = {}
 299
 300     def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 301         if debug or yaccdevel:
 302             if isinstance(debug, int):
 303                 debug = PlyLogger(sys.stderr)
 304             return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
 305         elif tracking:
 306             return self.parseopt(input, lexer, debug, tracking, tokenfunc)
 307         else:
 308             return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
 309
 310
 311     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 312     # parsedebug().
 313     #
 314     # This is the debugging enabled version of parse().  All changes made to the
 315     # parsing engine should be made here.   Optimized versions of this function
 316     # are automatically created by the ply/ygen.py script.  This script cuts out
 317     # sections enclosed in markers such as this:
 318     #
 319     #      #--! DEBUG
 320     #      statements
 321     #      #--! DEBUG
 322     #
 323     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 324
 325     def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 326         #--! parsedebug-start
 327         lookahead = None                         # Current lookahead symbol
 328         lookaheadstack = []                      # Stack of lookahead symbols
 329         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 330         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 331         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 332         defaulted_states = self.defaulted_states # Local reference to defaulted states
 333         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 334         errorcount = 0                           # Used during error recovery
 335
 336         #--! DEBUG
 337         debug.info('PLY: PARSE DEBUG START')
 338         #--! DEBUG
 339
 340         # If no lexer was given, we will try to use the lex module
 341         if not lexer:
 342             from . import lex
 343             lexer = lex.lexer
 344
 345         # Set up the lexer and parser objects on pslice
 346         pslice.lexer = lexer
 347         pslice.parser = self
 348
 349         # If input was supplied, pass to lexer
 350         if input is not None:
 351             lexer.input(input)
 352
 353         if tokenfunc is None:
 354             # Tokenize function
 355             get_token = lexer.token
 356         else:
 357             get_token = tokenfunc
 358
 359         # Set the parser() token method (sometimes used in error recovery)
 360         self.token = get_token
 361
 362         # Set up the state and symbol stacks
 363
 364         statestack = []                # Stack of parsing states
 365         self.statestack = statestack
 366         symstack   = []                # Stack of grammar symbols
 367         self.symstack = symstack
 368
 369         pslice.stack = symstack         # Put in the production
 370         errtoken   = None               # Err token
 371
 372         # The start state is assumed to be (0,$end)
 373
 374         statestack.append(0)
 375         sym = YaccSymbol()
 376         sym.type = '$end'
 377         symstack.append(sym)
 378         state = 0
 379         while True:
 380             # Get the next symbol on the input.  If a lookahead symbol
 381             # is already set, we just use that. Otherwise, we'll pull
 382             # the next token off of the lookaheadstack or from the lexer
 383
 384             #--! DEBUG
 385             debug.debug('')
 386             debug.debug('State  : %s', state)
 387             #--! DEBUG
 388
 389             if state not in defaulted_states:
 390                 if not lookahead:
 391                     if not lookaheadstack:
 392                         lookahead = get_token()     # Get the next token
 393                     else:
 394                         lookahead = lookaheadstack.pop()
 395                     if not lookahead:
 396                         lookahead = YaccSymbol()
 397                         lookahead.type = '$end'
 398
 399                 # Check the action table
 400                 ltype = lookahead.type
 401                 t = actions[state].get(ltype)
 402             else:
 403                 t = defaulted_states[state]
 404                 #--! DEBUG
 405                 debug.debug('Defaulted state %s: Reduce using %d', state, -t)
 406                 #--! DEBUG
 407
 408             #--! DEBUG
 409             debug.debug('Stack  : %s',
 410                         ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 411             #--! DEBUG
 412
 413             if t is not None:
 414                 if t > 0:
 415                     # shift a symbol on the stack
 416                     statestack.append(t)
 417                     state = t
 418
 419                     #--! DEBUG
 420                     debug.debug('Action : Shift and goto state %s', t)
 421                     #--! DEBUG
 422
 423                     symstack.append(lookahead)
 424                     lookahead = None
 425
 426                     # Decrease error count on successful shift
 427                     if errorcount:
 428                         errorcount -= 1
 429                     continue
 430
 431                 if t < 0:
 432                     # reduce a symbol on the stack, emit a production
 433                     p = prod[-t]
 434                     pname = p.name
 435                     plen  = p.len
 436
 437                     # Get production function
 438                     sym = YaccSymbol()
 439                     sym.type = pname       # Production name
 440                     sym.value = None
 441
 442                     #--! DEBUG
 443                     if plen:
 444                         debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
 445                                    '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
 446                                    goto[statestack[-1-plen]][pname])
 447                     else:
 448                         debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
 449                                    goto[statestack[-1]][pname])
 450
 451                     #--! DEBUG
 452
 453                     if plen:
 454                         targ = symstack[-plen-1:]
 455                         targ[0] = sym
 456
 457                         #--! TRACKING
 458                         if tracking:
 459                             t1 = targ[1]
 460                             sym.lineno = t1.lineno
 461                             sym.lexpos = t1.lexpos
 462                             t1 = targ[-1]
 463                             sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
 464                             sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
 465                         #--! TRACKING
 466
 467                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 468                         # The code enclosed in this section is duplicated
 469                         # below as a performance optimization.  Make sure
 470                         # changes get made in both locations.
 471
 472                         pslice.slice = targ
 473
 474                         try:
 475                             # Call the grammar rule with our special slice object
 476                             del symstack[-plen:]
 477                             del statestack[-plen:]
 478                             p.callable(pslice)
 479                             #--! DEBUG
 480                             debug.info('Result : %s', format_result(pslice[0]))
 481                             #--! DEBUG
 482                             symstack.append(sym)
 483                             state = goto[statestack[-1]][pname]
 484                             statestack.append(state)
 485                         except SyntaxError:
 486                             # If an error was set. Enter error recovery state
 487                             lookaheadstack.append(lookahead)
 488                             symstack.pop()
 489                             statestack.pop()
 490                             state = statestack[-1]
 491                             sym.type = 'error'
 492                             lookahead = sym
 493                             errorcount = error_count
 494                             self.errorok = False
 495                         continue
 496                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 497
 498                     else:
 499
 500                         #--! TRACKING
 501                         if tracking:
 502                             sym.lineno = lexer.lineno
 503                             sym.lexpos = lexer.lexpos
 504                         #--! TRACKING
 505
 506                         targ = [sym]
 507
 508                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 509                         # The code enclosed in this section is duplicated
 510                         # above as a performance optimization.  Make sure
 511                         # changes get made in both locations.
 512
 513                         pslice.slice = targ
 514
 515                         try:
 516                             # Call the grammar rule with our special slice object
 517                             p.callable(pslice)
 518                             #--! DEBUG
 519                             debug.info('Result : %s', format_result(pslice[0]))
 520                             #--! DEBUG
 521                             symstack.append(sym)
 522                             state = goto[statestack[-1]][pname]
 523                             statestack.append(state)
 524                         except SyntaxError:
 525                             # If an error was set. Enter error recovery state
 526                             lookaheadstack.append(lookahead)
 527                             symstack.pop()
 528                             statestack.pop()
 529                             state = statestack[-1]
 530                             sym.type = 'error'
 531                             lookahead = sym
 532                             errorcount = error_count
 533                             self.errorok = False
 534                         continue
 535                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 536
 537                 if t == 0:
 538                     n = symstack[-1]
 539                     result = getattr(n, 'value', None)
 540                     #--! DEBUG
 541                     debug.info('Done   : Returning %s', format_result(result))
 542                     debug.info('PLY: PARSE DEBUG END')
 543                     #--! DEBUG
 544                     return result
 545
 546             if t is None:
 547
 548                 #--! DEBUG
 549                 debug.error('Error  : %s',
 550                             ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
 551                 #--! DEBUG
 552
 553                 # We have some kind of parsing error here.  To handle
 554                 # this, we are going to push the current token onto
 555                 # the tokenstack and replace it with an 'error' token.
 556                 # If there are any synchronization rules, they may
 557                 # catch it.
 558                 #
 559                 # In addition to pushing the error token, we call call
 560                 # the user defined p_error() function if this is the
 561                 # first syntax error.  This function is only called if
 562                 # errorcount == 0.
 563                 if errorcount == 0 or self.errorok:
 564                     errorcount = error_count
 565                     self.errorok = False
 566                     errtoken = lookahead
 567                     if errtoken.type == '$end':
 568                         errtoken = None               # End of file!
 569                     if self.errorfunc:
 570                         if errtoken and not hasattr(errtoken, 'lexer'):
 571                             errtoken.lexer = lexer
 572                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 573                         if self.errorok:
 574                             # User must have done some kind of panic
 575                             # mode recovery on their own.  The
 576                             # returned token is the next lookahead
 577                             lookahead = tok
 578                             errtoken = None
 579                             continue
 580                     else:
 581                         if errtoken:
 582                             if hasattr(errtoken, 'lineno'):
 583                                 lineno = lookahead.lineno
 584                             else:
 585                                 lineno = 0
 586                             if lineno:
 587                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
 588                             else:
 589                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
 590                         else:
 591                             sys.stderr.write('yacc: Parse error in input. EOF\n')
 592                             return
 593
 594                 else:
 595                     errorcount = error_count
 596
 597                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 598                 # entire parse has been rolled back and we're completely hosed.   The token is
 599                 # discarded and we just keep going.
 600
 601                 if len(statestack) <= 1 and lookahead.type != '$end':
 602                     lookahead = None
 603                     errtoken = None
 604                     state = 0
 605                     # Nuke the pushback stack
 606                     del lookaheadstack[:]
 607                     continue
 608
 609                 # case 2: the statestack has a couple of entries on it, but we're
 610                 # at the end of the file. nuke the top entry and generate an error token
 611
 612                 # Start nuking entries on the stack
 613                 if lookahead.type == '$end':
 614                     # Whoa. We're really hosed here. Bail out
 615                     return
 616
 617                 if lookahead.type != 'error':
 618                     sym = symstack[-1]
 619                     if sym.type == 'error':
 620                         # Hmmm. Error is on top of stack, we'll just nuke input
 621                         # symbol and continue
 622                         #--! TRACKING
 623                         if tracking:
 624                             sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
 625                             sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
 626                         #--! TRACKING
 627                         lookahead = None
 628                         continue
 629
 630                     # Create the error symbol for the first time and make it the new lookahead symbol
 631                     t = YaccSymbol()
 632                     t.type = 'error'
 633
 634                     if hasattr(lookahead, 'lineno'):
 635                         t.lineno = t.endlineno = lookahead.lineno
 636                     if hasattr(lookahead, 'lexpos'):
 637                         t.lexpos = t.endlexpos = lookahead.lexpos
 638                     t.value = lookahead
 639                     lookaheadstack.append(lookahead)
 640                     lookahead = t
 641                 else:
 642                     sym = symstack.pop()
 643                     #--! TRACKING
 644                     if tracking:
 645                         lookahead.lineno = sym.lineno
 646                         lookahead.lexpos = sym.lexpos
 647                     #--! TRACKING
 648                     statestack.pop()
 649                     state = statestack[-1]
 650
 651                 continue
 652
 653             # Call an error function here
 654             raise RuntimeError('yacc: internal parser error!!!\n')
 655
 656         #--! parsedebug-end
 657
 658     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 659     # parseopt().
 660     #
 661     # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY!
 662     # This code is automatically generated by the ply/ygen.py script. Make
 663     # changes to the parsedebug() method instead.
 664     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 665
 666     def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 667         #--! parseopt-start
 668         lookahead = None                         # Current lookahead symbol
 669         lookaheadstack = []                      # Stack of lookahead symbols
 670         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 671         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 672         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 673         defaulted_states = self.defaulted_states # Local reference to defaulted states
 674         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 675         errorcount = 0                           # Used during error recovery
 676
 677
 678         # If no lexer was given, we will try to use the lex module
 679         if not lexer:
 680             from . import lex
 681             lexer = lex.lexer
 682
 683         # Set up the lexer and parser objects on pslice
 684         pslice.lexer = lexer
 685         pslice.parser = self
 686
 687         # If input was supplied, pass to lexer
 688         if input is not None:
 689             lexer.input(input)
 690
 691         if tokenfunc is None:
 692             # Tokenize function
 693             get_token = lexer.token
 694         else:
 695             get_token = tokenfunc
 696
 697         # Set the parser() token method (sometimes used in error recovery)
 698         self.token = get_token
 699
 700         # Set up the state and symbol stacks
 701
 702         statestack = []                # Stack of parsing states
 703         self.statestack = statestack
 704         symstack   = []                # Stack of grammar symbols
 705         self.symstack = symstack
 706
 707         pslice.stack = symstack         # Put in the production
 708         errtoken   = None               # Err token
 709
 710         # The start state is assumed to be (0,$end)
 711
 712         statestack.append(0)
 713         sym = YaccSymbol()
 714         sym.type = '$end'
 715         symstack.append(sym)
 716         state = 0
 717         while True:
 718             # Get the next symbol on the input.  If a lookahead symbol
 719             # is already set, we just use that. Otherwise, we'll pull
 720             # the next token off of the lookaheadstack or from the lexer
 721
 722
 723             if state not in defaulted_states:
 724                 if not lookahead:
 725                     if not lookaheadstack:
 726                         lookahead = get_token()     # Get the next token
 727                     else:
 728                         lookahead = lookaheadstack.pop()
 729                     if not lookahead:
 730                         lookahead = YaccSymbol()
 731                         lookahead.type = '$end'
 732
 733                 # Check the action table
 734                 ltype = lookahead.type
 735                 t = actions[state].get(ltype)
 736             else:
 737                 t = defaulted_states[state]
 738
 739
 740             if t is not None:
 741                 if t > 0:
 742                     # shift a symbol on the stack
 743                     statestack.append(t)
 744                     state = t
 745
 746
 747                     symstack.append(lookahead)
 748                     lookahead = None
 749
 750                     # Decrease error count on successful shift
 751                     if errorcount:
 752                         errorcount -= 1
 753                     continue
 754
 755                 if t < 0:
 756                     # reduce a symbol on the stack, emit a production
 757                     p = prod[-t]
 758                     pname = p.name
 759                     plen  = p.len
 760
 761                     # Get production function
 762                     sym = YaccSymbol()
 763                     sym.type = pname       # Production name
 764                     sym.value = None
 765
 766
 767                     if plen:
 768                         targ = symstack[-plen-1:]
 769                         targ[0] = sym
 770
 771                         #--! TRACKING
 772                         if tracking:
 773                             t1 = targ[1]
 774                             sym.lineno = t1.lineno
 775                             sym.lexpos = t1.lexpos
 776                             t1 = targ[-1]
 777                             sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
 778                             sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
 779                         #--! TRACKING
 780
 781                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 782                         # The code enclosed in this section is duplicated
 783                         # below as a performance optimization.  Make sure
 784                         # changes get made in both locations.
 785
 786                         pslice.slice = targ
 787
 788                         try:
 789                             # Call the grammar rule with our special slice object
 790                             del symstack[-plen:]
 791                             del statestack[-plen:]
 792                             p.callable(pslice)
 793                             symstack.append(sym)
 794                             state = goto[statestack[-1]][pname]
 795                             statestack.append(state)
 796                         except SyntaxError:
 797                             # If an error was set. Enter error recovery state
 798                             lookaheadstack.append(lookahead)
 799                             symstack.pop()
 800                             statestack.pop()
 801                             state = statestack[-1]
 802                             sym.type = 'error'
 803                             lookahead = sym
 804                             errorcount = error_count
 805                             self.errorok = False
 806                         continue
 807                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 808
 809                     else:
 810
 811                         #--! TRACKING
 812                         if tracking:
 813                             sym.lineno = lexer.lineno
 814                             sym.lexpos = lexer.lexpos
 815                         #--! TRACKING
 816
 817                         targ = [sym]
 818
 819                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 820                         # The code enclosed in this section is duplicated
 821                         # above as a performance optimization.  Make sure
 822                         # changes get made in both locations.
 823
 824                         pslice.slice = targ
 825
 826                         try:
 827                             # Call the grammar rule with our special slice object
 828                             p.callable(pslice)
 829                             symstack.append(sym)
 830                             state = goto[statestack[-1]][pname]
 831                             statestack.append(state)
 832                         except SyntaxError:
 833                             # If an error was set. Enter error recovery state
 834                             lookaheadstack.append(lookahead)
 835                             symstack.pop()
 836                             statestack.pop()
 837                             state = statestack[-1]
 838                             sym.type = 'error'
 839                             lookahead = sym
 840                             errorcount = error_count
 841                             self.errorok = False
 842                         continue
 843                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 844
 845                 if t == 0:
 846                     n = symstack[-1]
 847                     result = getattr(n, 'value', None)
 848                     return result
 849
 850             if t is None:
 851
 852
 853                 # We have some kind of parsing error here.  To handle
 854                 # this, we are going to push the current token onto
 855                 # the tokenstack and replace it with an 'error' token.
 856                 # If there are any synchronization rules, they may
 857                 # catch it.
 858                 #
 859                 # In addition to pushing the error token, we call call
 860                 # the user defined p_error() function if this is the
 861                 # first syntax error.  This function is only called if
 862                 # errorcount == 0.
 863                 if errorcount == 0 or self.errorok:
 864                     errorcount = error_count
 865                     self.errorok = False
 866                     errtoken = lookahead
 867                     if errtoken.type == '$end':
 868                         errtoken = None               # End of file!
 869                     if self.errorfunc:
 870                         if errtoken and not hasattr(errtoken, 'lexer'):
 871                             errtoken.lexer = lexer
 872                         tok = call_errorfunc(self.errorfunc, errtoken, self)
 873                         if self.errorok:
 874                             # User must have done some kind of panic
 875                             # mode recovery on their own.  The
 876                             # returned token is the next lookahead
 877                             lookahead = tok
 878                             errtoken = None
 879                             continue
 880                     else:
 881                         if errtoken:
 882                             if hasattr(errtoken, 'lineno'):
 883                                 lineno = lookahead.lineno
 884                             else:
 885                                 lineno = 0
 886                             if lineno:
 887                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
 888                             else:
 889                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
 890                         else:
 891                             sys.stderr.write('yacc: Parse error in input. EOF\n')
 892                             return
 893
 894                 else:
 895                     errorcount = error_count
 896
 897                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
 898                 # entire parse has been rolled back and we're completely hosed.   The token is
 899                 # discarded and we just keep going.
 900
 901                 if len(statestack) <= 1 and lookahead.type != '$end':
 902                     lookahead = None
 903                     errtoken = None
 904                     state = 0
 905                     # Nuke the pushback stack
 906                     del lookaheadstack[:]
 907                     continue
 908
 909                 # case 2: the statestack has a couple of entries on it, but we're
 910                 # at the end of the file. nuke the top entry and generate an error token
 911
 912                 # Start nuking entries on the stack
 913                 if lookahead.type == '$end':
 914                     # Whoa. We're really hosed here. Bail out
 915                     return
 916
 917                 if lookahead.type != 'error':
 918                     sym = symstack[-1]
 919                     if sym.type == 'error':
 920                         # Hmmm. Error is on top of stack, we'll just nuke input
 921                         # symbol and continue
 922                         #--! TRACKING
 923                         if tracking:
 924                             sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
 925                             sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
 926                         #--! TRACKING
 927                         lookahead = None
 928                         continue
 929
 930                     # Create the error symbol for the first time and make it the new lookahead symbol
 931                     t = YaccSymbol()
 932                     t.type = 'error'
 933
 934                     if hasattr(lookahead, 'lineno'):
 935                         t.lineno = t.endlineno = lookahead.lineno
 936                     if hasattr(lookahead, 'lexpos'):
 937                         t.lexpos = t.endlexpos = lookahead.lexpos
 938                     t.value = lookahead
 939                     lookaheadstack.append(lookahead)
 940                     lookahead = t
 941                 else:
 942                     sym = symstack.pop()
 943                     #--! TRACKING
 944                     if tracking:
 945                         lookahead.lineno = sym.lineno
 946                         lookahead.lexpos = sym.lexpos
 947                     #--! TRACKING
 948                     statestack.pop()
 949                     state = statestack[-1]
 950
 951                 continue
 952
 953             # Call an error function here
 954             raise RuntimeError('yacc: internal parser error!!!\n')
 955
 956         #--! parseopt-end
 957
 958     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 959     # parseopt_notrack().
 960     #
 961     # Optimized version of parseopt() with line number tracking removed.
 962     # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
 963     # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
 964     # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 965
 966     def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
 967         #--! parseopt-notrack-start
 968         lookahead = None                         # Current lookahead symbol
 969         lookaheadstack = []                      # Stack of lookahead symbols
 970         actions = self.action                    # Local reference to action table (to avoid lookup on self.)
 971         goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
 972         prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
 973         defaulted_states = self.defaulted_states # Local reference to defaulted states
 974         pslice  = YaccProduction(None)           # Production object passed to grammar rules
 975         errorcount = 0                           # Used during error recovery
 976
 977
 978         # If no lexer was given, we will try to use the lex module
 979         if not lexer:
 980             from . import lex
 981             lexer = lex.lexer
 982
 983         # Set up the lexer and parser objects on pslice
 984         pslice.lexer = lexer
 985         pslice.parser = self
 986
 987         # If input was supplied, pass to lexer
 988         if input is not None:
 989             lexer.input(input)
 990
 991         if tokenfunc is None:
 992             # Tokenize function
 993             get_token = lexer.token
 994         else:
 995             get_token = tokenfunc
 996
 997         # Set the parser() token method (sometimes used in error recovery)
 998         self.token = get_token
 999
1000         # Set up the state and symbol stacks
1001
1002         statestack = []                # Stack of parsing states
1003         self.statestack = statestack
1004         symstack   = []                # Stack of grammar symbols
1005         self.symstack = symstack
1006
1007         pslice.stack = symstack         # Put in the production
1008         errtoken   = None               # Err token
1009
1010         # The start state is assumed to be (0,$end)
1011
1012         statestack.append(0)
1013         sym = YaccSymbol()
1014         sym.type = '$end'
1015         symstack.append(sym)
1016         state = 0
1017         while True:
1018             # Get the next symbol on the input.  If a lookahead symbol
1019             # is already set, we just use that. Otherwise, we'll pull
1020             # the next token off of the lookaheadstack or from the lexer
1021
1022
1023             if state not in defaulted_states:
1024                 if not lookahead:
1025                     if not lookaheadstack:
1026                         lookahead = get_token()     # Get the next token
1027                     else:
1028                         lookahead = lookaheadstack.pop()
1029                     if not lookahead:
1030                         lookahead = YaccSymbol()
1031                         lookahead.type = '$end'
1032
1033                 # Check the action table
1034                 ltype = lookahead.type
1035                 t = actions[state].get(ltype)
1036             else:
1037                 t = defaulted_states[state]
1038
1039
1040             if t is not None:
1041                 if t > 0:
1042                     # shift a symbol on the stack
1043                     statestack.append(t)
1044                     state = t
1045
1046
1047                     symstack.append(lookahead)
1048                     lookahead = None
1049
1050                     # Decrease error count on successful shift
1051                     if errorcount:
1052                         errorcount -= 1
1053                     continue
1054
1055                 if t < 0:
1056                     # reduce a symbol on the stack, emit a production
1057                     p = prod[-t]
1058                     pname = p.name
1059                     plen  = p.len
1060
1061                     # Get production function
1062                     sym = YaccSymbol()
1063                     sym.type = pname       # Production name
1064                     sym.value = None
1065
1066
1067                     if plen:
1068                         targ = symstack[-plen-1:]
1069                         targ[0] = sym
1070
1071
1072                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1073                         # The code enclosed in this section is duplicated
1074                         # below as a performance optimization.  Make sure
1075                         # changes get made in both locations.
1076
1077                         pslice.slice = targ
1078
1079                         try:
1080                             # Call the grammar rule with our special slice object
1081                             del symstack[-plen:]
1082                             del statestack[-plen:]
1083                             p.callable(pslice)
1084                             symstack.append(sym)
1085                             state = goto[statestack[-1]][pname]
1086                             statestack.append(state)
1087                         except SyntaxError:
1088                             # If an error was set. Enter error recovery state
1089                             lookaheadstack.append(lookahead)
1090                             symstack.pop()
1091                             statestack.pop()
1092                             state = statestack[-1]
1093                             sym.type = 'error'
1094                             lookahead = sym
1095                             errorcount = error_count
1096                             self.errorok = False
1097                         continue
1098                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1099
1100                     else:
1101
1102
1103                         targ = [sym]
1104
1105                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1106                         # The code enclosed in this section is duplicated
1107                         # above as a performance optimization.  Make sure
1108                         # changes get made in both locations.
1109
1110                         pslice.slice = targ
1111
1112                         try:
1113                             # Call the grammar rule with our special slice object
1114                             p.callable(pslice)
1115                             symstack.append(sym)
1116                             state = goto[statestack[-1]][pname]
1117                             statestack.append(state)
1118                         except SyntaxError:
1119                             # If an error was set. Enter error recovery state
1120                             lookaheadstack.append(lookahead)
1121                             symstack.pop()
1122                             statestack.pop()
1123                             state = statestack[-1]
1124                             sym.type = 'error'
1125                             lookahead = sym
1126                             errorcount = error_count
1127                             self.errorok = False
1128                         continue
1129                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1130
1131                 if t == 0:
1132                     n = symstack[-1]
1133                     result = getattr(n, 'value', None)
1134                     return result
1135
1136             if t is None:
1137
1138
1139                 # We have some kind of parsing error here.  To handle
1140                 # this, we are going to push the current token onto
1141                 # the tokenstack and replace it with an 'error' token.
1142                 # If there are any synchronization rules, they may
1143                 # catch it.
1144                 #
1145                 # In addition to pushing the error token, we call call
1146                 # the user defined p_error() function if this is the
1147                 # first syntax error.  This function is only called if
1148                 # errorcount == 0.
1149                 if errorcount == 0 or self.errorok:
1150                     errorcount = error_count
1151                     self.errorok = False
1152                     errtoken = lookahead
1153                     if errtoken.type == '$end':
1154                         errtoken = None               # End of file!
1155                     if self.errorfunc:
1156                         if errtoken and not hasattr(errtoken, 'lexer'):
1157                             errtoken.lexer = lexer
1158                         tok = call_errorfunc(self.errorfunc, errtoken, self)
1159                         if self.errorok:
1160                             # User must have done some kind of panic
1161                             # mode recovery on their own.  The
1162                             # returned token is the next lookahead
1163                             lookahead = tok
1164                             errtoken = None
1165                             continue
1166                     else:
1167                         if errtoken:
1168                             if hasattr(errtoken, 'lineno'):
1169                                 lineno = lookahead.lineno
1170                             else:
1171                                 lineno = 0
1172                             if lineno:
1173                                 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1174                             else:
1175                                 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1176                         else:
1177                             sys.stderr.write('yacc: Parse error in input. EOF\n')
1178                             return
1179
1180                 else:
1181                     errorcount = error_count
1182
1183                 # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1184                 # entire parse has been rolled back and we're completely hosed.   The token is
1185                 # discarded and we just keep going.
1186
1187                 if len(statestack) <= 1 and lookahead.type != '$end':
1188                     lookahead = None
1189                     errtoken = None
1190                     state = 0
1191                     # Nuke the pushback stack
1192                     del lookaheadstack[:]
1193                     continue
1194
1195                 # case 2: the statestack has a couple of entries on it, but we're
1196                 # at the end of the file. nuke the top entry and generate an error token
1197
1198                 # Start nuking entries on the stack
1199                 if lookahead.type == '$end':
1200                     # Whoa. We're really hosed here. Bail out
1201                     return
1202
1203                 if lookahead.type != 'error':
1204                     sym = symstack[-1]
1205                     if sym.type == 'error':
1206                         # Hmmm. Error is on top of stack, we'll just nuke input
1207                         # symbol and continue
1208                         lookahead = None
1209                         continue
1210
1211                     # Create the error symbol for the first time and make it the new lookahead symbol
1212                     t = YaccSymbol()
1213                     t.type = 'error'
1214
1215                     if hasattr(lookahead, 'lineno'):
1216                         t.lineno = t.endlineno = lookahead.lineno
1217                     if hasattr(lookahead, 'lexpos'):
1218                         t.lexpos = t.endlexpos = lookahead.lexpos
1219                     t.value = lookahead
1220                     lookaheadstack.append(lookahead)
1221                     lookahead = t
1222                 else:
1223                     sym = symstack.pop()
1224                     statestack.pop()
1225                     state = statestack[-1]
1226
1227                 continue
1228
1229             # Call an error function here
1230             raise RuntimeError('yacc: internal parser error!!!\n')
1231
1232         #--! parseopt-notrack-end
1233
1234 # -----------------------------------------------------------------------------
1235 #                          === Grammar Representation ===
1236 #
1237 # The following functions, classes, and variables are used to represent and
1238 # manipulate the rules that make up a grammar.
1239 # -----------------------------------------------------------------------------
1240
1241 # regex matching identifiers
1242 _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1243
1244 # -----------------------------------------------------------------------------
1245 # class Production:
1246 #
1247 # This class stores the raw information about a single production or grammar rule.
1248 # A grammar rule refers to a specification such as this:
1249 #
1250 #       expr : expr PLUS term
1251 #
1252 # Here are the basic attributes defined on all productions
1253 #
1254 #       name     - Name of the production.  For example 'expr'
1255 #       prod     - A list of symbols on the right side ['expr','PLUS','term']
1256 #       prec     - Production precedence level
1257 #       number   - Production number.
1258 #       func     - Function that executes on reduce
1259 #       file     - File where production function is defined
1260 #       lineno   - Line number where production function is defined
1261 #
1262 # The following attributes are defined or optional.
1263 #
1264 #       len       - Length of the production (number of symbols on right hand side)
1265 #       usyms     - Set of unique symbols found in the production
1266 # -----------------------------------------------------------------------------
1267
1268 class Production(object):
1269     reduced = 0
1270     def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1271         self.name     = name
1272         self.prod     = tuple(prod)
1273         self.number   = number
1274         self.func     = func
1275         self.callable = None
1276         self.file     = file
1277         self.line     = line
1278         self.prec     = precedence
1279
1280         # Internal settings used during table construction
1281
1282         self.len  = len(self.prod)   # Length of the production
1283
1284         # Create a list of unique production symbols used in the production
1285         self.usyms = []
1286         for s in self.prod:
1287             if s not in self.usyms:
1288                 self.usyms.append(s)
1289
1290         # List of all LR items for the production
1291         self.lr_items = []
1292         self.lr_next = None
1293
1294         # Create a string representation
1295         if self.prod:
1296             self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1297         else:
1298             self.str = '%s -> <empty>' % self.name
1299
1300     def __str__(self):
1301         return self.str
1302
1303     def __repr__(self):
1304         return 'Production(' + str(self) + ')'
1305
1306     def __len__(self):
1307         return len(self.prod)
1308
1309     def __nonzero__(self):
1310         return 1
1311
1312     def __getitem__(self, index):
1313         return self.prod[index]
1314
1315     # Return the nth lr_item from the production (or None if at the end)
1316     def lr_item(self, n):
1317         if n > len(self.prod):
1318             return None
1319         p = LRItem(self, n)
1320         # Precompute the list of productions immediately following.
1321         try:
1322             p.lr_after = Prodnames[p.prod[n+1]]
1323         except (IndexError, KeyError):
1324             p.lr_after = []
1325         try:
1326             p.lr_before = p.prod[n-1]
1327         except IndexError:
1328             p.lr_before = None
1329         return p
1330
1331     # Bind the production function name to a callable
1332     def bind(self, pdict):
1333         if self.func:
1334             self.callable = pdict[self.func]
1335
1336 # This class serves as a minimal standin for Production objects when
1337 # reading table data from files.   It only contains information
1338 # actually used by the LR parsing engine, plus some additional
1339 # debugging information.
1340 class MiniProduction(object):
1341     def __init__(self, str, name, len, func, file, line):
1342         self.name     = name
1343         self.len      = len
1344         self.func     = func
1345         self.callable = None
1346         self.file     = file
1347         self.line     = line
1348         self.str      = str
1349
1350     def __str__(self):
1351         return self.str
1352
1353     def __repr__(self):
1354         return 'MiniProduction(%s)' % self.str
1355
1356     # Bind the production function name to a callable
1357     def bind(self, pdict):
1358         if self.func:
1359             self.callable = pdict[self.func]
1360
1361
1362 # -----------------------------------------------------------------------------
1363 # class LRItem
1364 #
1365 # This class represents a specific stage of parsing a production rule.  For
1366 # example:
1367 #
1368 #       expr : expr . PLUS term
1369 #
1370 # In the above, the "." represents the current location of the parse.  Here
1371 # basic attributes:
1372 #
1373 #       name       - Name of the production.  For example 'expr'
1374 #       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375 #       number     - Production number.
1376 #
1377 #       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378 #                    then lr_next refers to 'expr -> expr PLUS . term'
1379 #       lr_index   - LR item index (location of the ".") in the prod list.
1380 #       lookaheads - LALR lookahead symbols for this item
1381 #       len        - Length of the production (number of symbols on right hand side)
1382 #       lr_after    - List of all productions that immediately follow
1383 #       lr_before   - Grammar symbol immediately before
1384 # -----------------------------------------------------------------------------
1385
1386 class LRItem(object):
1387     def __init__(self, p, n):
1388         self.name       = p.name
1389         self.prod       = list(p.prod)
1390         self.number     = p.number
1391         self.lr_index   = n
1392         self.lookaheads = {}
1393         self.prod.insert(n, '.')
1394         self.prod       = tuple(self.prod)
1395         self.len        = len(self.prod)
1396         self.usyms      = p.usyms
1397
1398     def __str__(self):
1399         if self.prod:
1400             s = '%s -> %s' % (self.name, ' '.join(self.prod))
1401         else:
1402             s = '%s -> <empty>' % self.name
1403         return s
1404
1405     def __repr__(self):
1406         return 'LRItem(' + str(self) + ')'
1407
1408 # -----------------------------------------------------------------------------
1409 # rightmost_terminal()
1410 #
1411 # Return the rightmost terminal from a list of symbols.  Used in add_production()
1412 # -----------------------------------------------------------------------------
1413 def rightmost_terminal(symbols, terminals):
1414     i = len(symbols) - 1
1415     while i >= 0:
1416         if symbols[i] in terminals:
1417             return symbols[i]
1418         i -= 1
1419     return None
1420
1421 # -----------------------------------------------------------------------------
1422 #                           === GRAMMAR CLASS ===
1423 #
1424 # The following class represents the contents of the specified grammar along
1425 # with various computed properties such as first sets, follow sets, LR items, etc.
1426 # This data is used for critical parts of the table generation process later.
1427 # -----------------------------------------------------------------------------
1428
1429 class GrammarError(YaccError):
1430     pass
1431
1432 class Grammar(object):
1433     def __init__(self, terminals):
1434         self.Productions  = [None]  # A list of all of the productions.  The first
1435                                     # entry is always reserved for the purpose of
1436                                     # building an augmented grammar
1437
1438         self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
1439                                     # productions of that nonterminal.
1440
1441         self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
1442                                     # productions.
1443
1444         self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
1445                                     # list of the rules where they are used.
1446
1447         for term in terminals:
1448             self.Terminals[term] = []
1449
1450         self.Terminals['error'] = []
1451
1452         self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
1453                                     # of rule numbers where they are used.
1454
1455         self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
1456
1457         self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
1458
1459         self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
1460                                     # form ('right',level) or ('nonassoc', level) or ('left',level)
1461
1462         self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1463                                     # This is only used to provide error checking and to generate
1464                                     # a warning about unused precedence rules.
1465
1466         self.Start = None           # Starting symbol for the grammar
1467
1468
1469     def __len__(self):
1470         return len(self.Productions)
1471
1472     def __getitem__(self, index):
1473         return self.Productions[index]
1474
1475     # -----------------------------------------------------------------------------
1476     # set_precedence()
1477     #
1478     # Sets the precedence for a given terminal. assoc is the associativity such as
1479     # 'left','right', or 'nonassoc'.  level is a numeric level.
1480     #
1481     # -----------------------------------------------------------------------------
1482
1483     def set_precedence(self, term, assoc, level):
1484         assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1485         if term in self.Precedence:
1486             raise GrammarError('Precedence already specified for terminal %r' % term)
1487         if assoc not in ['left', 'right', 'nonassoc']:
1488             raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489         self.Precedence[term] = (assoc, level)
1490
1491     # -----------------------------------------------------------------------------
1492     # add_production()
1493     #
1494     # Given an action function, this function assembles a production rule and
1495     # computes its precedence level.
1496     #
1497     # The production rule is supplied as a list of symbols.   For example,
1498     # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499     # symbols ['expr','PLUS','term'].
1500     #
1501     # Precedence is determined by the precedence of the right-most non-terminal
1502     # or the precedence of a terminal specified by %prec.
1503     #
1504     # A variety of error checks are performed to make sure production symbols
1505     # are valid and that %prec is used correctly.
1506     # -----------------------------------------------------------------------------
1507
1508     def add_production(self, prodname, syms, func=None, file='', line=0):
1509
1510         if prodname in self.Terminals:
1511             raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1512         if prodname == 'error':
1513             raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1514         if not _is_identifier.match(prodname):
1515             raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1516
1517         # Look for literal tokens
1518         for n, s in enumerate(syms):
1519             if s[0] in "'\"":
1520                 try:
1521                     c = eval(s)
1522                     if (len(c) > 1):
1523                         raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524                                            (file, line, s, prodname))
1525                     if c not in self.Terminals:
1526                         self.Terminals[c] = []
1527                     syms[n] = c
1528                     continue
1529                 except SyntaxError:
1530                     pass
1531             if not _is_identifier.match(s) and s != '%prec':
1532                 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1533
1534         # Determine the precedence level
1535         if '%prec' in syms:
1536             if syms[-1] == '%prec':
1537                 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1538             if syms[-2] != '%prec':
1539                 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1540                                    (file, line))
1541             precname = syms[-1]
1542             prodprec = self.Precedence.get(precname)
1543             if not prodprec:
1544                 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1545             else:
1546                 self.UsedPrecedence.add(precname)
1547             del syms[-2:]     # Drop %prec from the rule
1548         else:
1549             # If no %prec, precedence is determined by the rightmost terminal symbol
1550             precname = rightmost_terminal(syms, self.Terminals)
1551             prodprec = self.Precedence.get(precname, ('right', 0))
1552
1553         # See if the rule is already in the rulemap
1554         map = '%s -> %s' % (prodname, syms)
1555         if map in self.Prodmap:
1556             m = self.Prodmap[map]
1557             raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1558                                'Previous definition at %s:%d' % (m.file, m.line))
1559
1560         # From this point on, everything is valid.  Create a new Production instance
1561         pnumber  = len(self.Productions)
1562         if prodname not in self.Nonterminals:
1563             self.Nonterminals[prodname] = []
1564
1565         # Add the production number to Terminals and Nonterminals
1566         for t in syms:
1567             if t in self.Terminals:
1568                 self.Terminals[t].append(pnumber)
1569             else:
1570                 if t not in self.Nonterminals:
1571                     self.Nonterminals[t] = []
1572                 self.Nonterminals[t].append(pnumber)
1573
1574         # Create a production and add it to the list of productions
1575         p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1576         self.Productions.append(p)
1577         self.Prodmap[map] = p
1578
1579         # Add to the global productions list
1580         try:
1581             self.Prodnames[prodname].append(p)
1582         except KeyError:
1583             self.Prodnames[prodname] = [p]
1584
1585     # -----------------------------------------------------------------------------
1586     # set_start()
1587     #
1588     # Sets the starting symbol and creates the augmented grammar.  Production
1589     # rule 0 is S' -> start where start is the start symbol.
1590     # -----------------------------------------------------------------------------
1591
1592     def set_start(self, start=None):
1593         if not start:
1594             start = self.Productions[1].name
1595         if start not in self.Nonterminals:
1596             raise GrammarError('start symbol %s undefined' % start)
1597         self.Productions[0] = Production(0, "S'", [start])
1598         self.Nonterminals[start].append(0)
1599         self.Start = start
1600
1601     # -----------------------------------------------------------------------------
1602     # find_unreachable()
1603     #
1604     # Find all of the nonterminal symbols that can't be reached from the starting
1605     # symbol.  Returns a list of nonterminals that can't be reached.
1606     # -----------------------------------------------------------------------------
1607
1608     def find_unreachable(self):
1609
1610         # Mark all symbols that are reachable from a symbol s
1611         def mark_reachable_from(s):
1612             if s in reachable:
1613                 return
1614             reachable.add(s)
1615             for p in self.Prodnames.get(s, []):
1616                 for r in p.prod:
1617                     mark_reachable_from(r)
1618
1619         reachable = set()
1620         mark_reachable_from(self.Productions[0].prod[0])
1621         return [s for s in self.Nonterminals if s not in reachable]
1622
1623     # -----------------------------------------------------------------------------
1624     # infinite_cycles()
1625     #
1626     # This function looks at the various parsing rules and tries to detect
1627     # infinite recursion cycles (grammar rules where there is no possible way
1628     # to derive a string of only terminals).
1629     # -----------------------------------------------------------------------------
1630
1631     def infinite_cycles(self):
1632         terminates = {}
1633
1634         # Terminals:
1635         for t in self.Terminals:
1636             terminates[t] = True
1637
1638         terminates['$end'] = True
1639
1640         # Nonterminals:
1641
1642         # Initialize to false:
1643         for n in self.Nonterminals:
1644             terminates[n] = False
1645
1646         # Then propagate termination until no change:
1647         while True:
1648             some_change = False
1649             for (n, pl) in self.Prodnames.items():
1650                 # Nonterminal n terminates iff any of its productions terminates.
1651                 for p in pl:
1652                     # Production p terminates iff all of its rhs symbols terminate.
1653                     for s in p.prod:
1654                         if not terminates[s]:
1655                             # The symbol s does not terminate,
1656                             # so production p does not terminate.
1657                             p_terminates = False
1658                             break
1659                     else:
1660                         # didn't break from the loop,
1661                         # so every symbol s terminates
1662                         # so production p terminates.
1663                         p_terminates = True
1664
1665                     if p_terminates:
1666                         # symbol n terminates!
1667                         if not terminates[n]:
1668                             terminates[n] = True
1669                             some_change = True
1670                         # Don't need to consider any more productions for this n.
1671                         break
1672
1673             if not some_change:
1674                 break
1675
1676         infinite = []
1677         for (s, term) in terminates.items():
1678             if not term:
1679                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1680                     # s is used-but-not-defined, and we've already warned of that,
1681                     # so it would be overkill to say that it's also non-terminating.
1682                     pass
1683                 else:
1684                     infinite.append(s)
1685
1686         return infinite
1687
1688     # -----------------------------------------------------------------------------
1689     # undefined_symbols()
1690     #
1691     # Find all symbols that were used the grammar, but not defined as tokens or
1692     # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1693     # and prod is the production where the symbol was used.
1694     # -----------------------------------------------------------------------------
1695     def undefined_symbols(self):
1696         result = []
1697         for p in self.Productions:
1698             if not p:
1699                 continue
1700
1701             for s in p.prod:
1702                 if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703                     result.append((s, p))
1704         return result
1705
1706     # -----------------------------------------------------------------------------
1707     # unused_terminals()
1708     #
1709     # Find all terminals that were defined, but not used by the grammar.  Returns
1710     # a list of all symbols.
1711     # -----------------------------------------------------------------------------
1712     def unused_terminals(self):
1713         unused_tok = []
1714         for s, v in self.Terminals.items():
1715             if s != 'error' and not v:
1716                 unused_tok.append(s)
1717
1718         return unused_tok
1719
1720     # ------------------------------------------------------------------------------
1721     # unused_rules()
1722     #
1723     # Find all grammar rules that were defined,  but not used (maybe not reachable)
1724     # Returns a list of productions.
1725     # ------------------------------------------------------------------------------
1726
1727     def unused_rules(self):
1728         unused_prod = []
1729         for s, v in self.Nonterminals.items():
1730             if not v:
1731                 p = self.Prodnames[s][0]
1732                 unused_prod.append(p)
1733         return unused_prod
1734
1735     # -----------------------------------------------------------------------------
1736     # unused_precedence()
1737     #
1738     # Returns a list of tuples (term,precedence) corresponding to precedence
1739     # rules that were never used by the grammar.  term is the name of the terminal
1740     # on which precedence was applied and precedence is a string such as 'left' or
1741     # 'right' corresponding to the type of precedence.
1742     # -----------------------------------------------------------------------------
1743
1744     def unused_precedence(self):
1745         unused = []
1746         for termname in self.Precedence:
1747             if not (termname in self.Terminals or termname in self.UsedPrecedence):
1748                 unused.append((termname, self.Precedence[termname][0]))
1749
1750         return unused
1751
1752     # -------------------------------------------------------------------------
1753     # _first()
1754     #
1755     # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1756     #
1757     # During execution of compute_first1, the result may be incomplete.
1758     # Afterward (e.g., when called from compute_follow()), it will be complete.
1759     # -------------------------------------------------------------------------
1760     def _first(self, beta):
1761
1762         # We are computing First(x1,x2,x3,...,xn)
1763         result = []
1764         for x in beta:
1765             x_produces_empty = False
1766
1767             # Add all the non-<empty> symbols of First[x] to the result.
1768             for f in self.First[x]:
1769                 if f == '<empty>':
1770                     x_produces_empty = True
1771                 else:
1772                     if f not in result:
1773                         result.append(f)
1774
1775             if x_produces_empty:
1776                 # We have to consider the next x in beta,
1777                 # i.e. stay in the loop.
1778                 pass
1779             else:
1780                 # We don't have to consider any further symbols in beta.
1781                 break
1782         else:
1783             # There was no 'break' from the loop,
1784             # so x_produces_empty was true for all x in beta,
1785             # so beta produces empty as well.
1786             result.append('<empty>')
1787
1788         return result
1789
1790     # -------------------------------------------------------------------------
1791     # compute_first()
1792     #
1793     # Compute the value of FIRST1(X) for all symbols
1794     # -------------------------------------------------------------------------
1795     def compute_first(self):
1796         if self.First:
1797             return self.First
1798
1799         # Terminals:
1800         for t in self.Terminals:
1801             self.First[t] = [t]
1802
1803         self.First['$end'] = ['$end']
1804
1805         # Nonterminals:
1806
1807         # Initialize to the empty set:
1808         for n in self.Nonterminals:
1809             self.First[n] = []
1810
1811         # Then propagate symbols until no change:
1812         while True:
1813             some_change = False
1814             for n in self.Nonterminals:
1815                 for p in self.Prodnames[n]:
1816                     for f in self._first(p.prod):
1817                         if f not in self.First[n]:
1818                             self.First[n].append(f)
1819                             some_change = True
1820             if not some_change:
1821                 break
1822
1823         return self.First
1824
1825     # ---------------------------------------------------------------------
1826     # compute_follow()
1827     #
1828     # Computes all of the follow sets for every non-terminal symbol.  The
1829     # follow set is the set of all symbols that might follow a given
1830     # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1831     # ---------------------------------------------------------------------
1832     def compute_follow(self, start=None):
1833         # If already computed, return the result
1834         if self.Follow:
1835             return self.Follow
1836
1837         # If first sets not computed yet, do that first.
1838         if not self.First:
1839             self.compute_first()
1840
1841         # Add '$end' to the follow list of the start symbol
1842         for k in self.Nonterminals:
1843             self.Follow[k] = []
1844
1845         if not start:
1846             start = self.Productions[1].name
1847
1848         self.Follow[start] = ['$end']
1849
1850         while True:
1851             didadd = False
1852             for p in self.Productions[1:]:
1853                 # Here is the production set
1854                 for i, B in enumerate(p.prod):
1855                     if B in self.Nonterminals:
1856                         # Okay. We got a non-terminal in a production
1857                         fst = self._first(p.prod[i+1:])
1858                         hasempty = False
1859                         for f in fst:
1860                             if f != '<empty>' and f not in self.Follow[B]:
1861                                 self.Follow[B].append(f)
1862                                 didadd = True
1863                             if f == '<empty>':
1864                                 hasempty = True
1865                         if hasempty or i == (len(p.prod)-1):
1866                             # Add elements of follow(a) to follow(b)
1867                             for f in self.Follow[p.name]:
1868                                 if f not in self.Follow[B]:
1869                                     self.Follow[B].append(f)
1870                                     didadd = True
1871             if not didadd:
1872                 break
1873         return self.Follow
1874
1875
1876     # -----------------------------------------------------------------------------
1877     # build_lritems()
1878     #
1879     # This function walks the list of productions and builds a complete set of the
1880     # LR items.  The LR items are stored in two ways:  First, they are uniquely
1881     # numbered and placed in the list _lritems.  Second, a linked list of LR items
1882     # is built for each production.  For example:
1883     #
1884     #   E -> E PLUS E
1885     #
1886     # Creates the list
1887     #
1888     #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889     # -----------------------------------------------------------------------------
1890
1891     def build_lritems(self):
1892         for p in self.Productions:
1893             lastlri = p
1894             i = 0
1895             lr_items = []
1896             while True:
1897                 if i > len(p):
1898                     lri = None
1899                 else:
1900                     lri = LRItem(p, i)
1901                     # Precompute the list of productions immediately following
1902                     try:
1903                         lri.lr_after = self.Prodnames[lri.prod[i+1]]
1904                     except (IndexError, KeyError):
1905                         lri.lr_after = []
1906                     try:
1907                         lri.lr_before = lri.prod[i-1]
1908                     except IndexError:
1909                         lri.lr_before = None
1910
1911                 lastlri.lr_next = lri
1912                 if not lri:
1913                     break
1914                 lr_items.append(lri)
1915                 lastlri = lri
1916                 i += 1
1917             p.lr_items = lr_items
1918
1919 # -----------------------------------------------------------------------------
1920 #                            == Class LRTable ==
1921 #
1922 # This basic class represents a basic table of LR parsing information.
1923 # Methods for generating the tables are not defined here.  They are defined
1924 # in the derived class LRGeneratedTable.
1925 # -----------------------------------------------------------------------------
1926
1927 class VersionError(YaccError):
1928     pass
1929
1930 class LRTable(object):
1931     def __init__(self):
1932         self.lr_action = None
1933         self.lr_goto = None
1934         self.lr_productions = None
1935         self.lr_method = None
1936
1937     def read_table(self, module):
1938         if isinstance(module, types.ModuleType):
1939             parsetab = module
1940         else:
1941             exec('import %s' % module)
1942             parsetab = sys.modules[module]
1943
1944         if parsetab._tabversion != __tabversion__:
1945             raise VersionError('yacc table file version is out of date')
1946
1947         self.lr_action = parsetab._lr_action
1948         self.lr_goto = parsetab._lr_goto
1949
1950         self.lr_productions = []
1951         for p in parsetab._lr_productions:
1952             self.lr_productions.append(MiniProduction(*p))
1953
1954         self.lr_method = parsetab._lr_method
1955         return parsetab._lr_signature
1956
1957     def read_pickle(self, filename):
1958         try:
1959             import cPickle as pickle
1960         except ImportError:
1961             import pickle
1962
1963         if not os.path.exists(filename):
1964           raise ImportError
1965
1966         in_f = open(filename, 'rb')
1967
1968         tabversion = pickle.load(in_f)
1969         if tabversion != __tabversion__:
1970             raise VersionError('yacc table file version is out of date')
1971         self.lr_method = pickle.load(in_f)
1972         signature      = pickle.load(in_f)
1973         self.lr_action = pickle.load(in_f)
1974         self.lr_goto   = pickle.load(in_f)
1975         productions    = pickle.load(in_f)
1976
1977         self.lr_productions = []
1978         for p in productions:
1979             self.lr_productions.append(MiniProduction(*p))
1980
1981         in_f.close()
1982         return signature
1983
1984     # Bind all production function names to callable objects in pdict
1985     def bind_callables(self, pdict):
1986         for p in self.lr_productions:
1987             p.bind(pdict)
1988
1989
1990 # -----------------------------------------------------------------------------
1991 #                           === LR Generator ===
1992 #
1993 # The following classes and functions are used to generate LR parsing tables on
1994 # a grammar.
1995 # -----------------------------------------------------------------------------
1996
1997 # -----------------------------------------------------------------------------
1998 # digraph()
1999 # traverse()
2000 #
2001 # The following two functions are used to compute set valued functions
2002 # of the form:
2003 #
2004 #     F(x) = F'(x) U U{F(y) | x R y}
2005 #
2006 # This is used to compute the values of Read() sets as well as FOLLOW sets
2007 # in LALR(1) generation.
2008 #
2009 # Inputs:  X    - An input set
2010 #          R    - A relation
2011 #          FP   - Set-valued function
2012 # ------------------------------------------------------------------------------
2013
2014 def digraph(X, R, FP):
2015     N = {}
2016     for x in X:
2017         N[x] = 0
2018     stack = []
2019     F = {}
2020     for x in X:
2021         if N[x] == 0:
2022             traverse(x, N, stack, F, X, R, FP)
2023     return F
2024
2025 def traverse(x, N, stack, F, X, R, FP):
2026     stack.append(x)
2027     d = len(stack)
2028     N[x] = d
2029     F[x] = FP(x)             # F(X) <- F'(x)
2030
2031     rel = R(x)               # Get y's related to x
2032     for y in rel:
2033         if N[y] == 0:
2034             traverse(y, N, stack, F, X, R, FP)
2035         N[x] = min(N[x], N[y])
2036         for a in F.get(y, []):
2037             if a not in F[x]:
2038                 F[x].append(a)
2039     if N[x] == d:
2040         N[stack[-1]] = MAXINT
2041         F[stack[-1]] = F[x]
2042         element = stack.pop()
2043         while element != x:
2044             N[stack[-1]] = MAXINT
2045             F[stack[-1]] = F[x]
2046             element = stack.pop()
2047
2048 class LALRError(YaccError):
2049     pass
2050
2051 # -----------------------------------------------------------------------------
2052 #                             == LRGeneratedTable ==
2053 #
2054 # This class implements the LR table generation algorithm.  There are no
2055 # public methods except for write()
2056 # -----------------------------------------------------------------------------
2057
2058 class LRGeneratedTable(LRTable):
2059     def __init__(self, grammar, method='LALR', log=None):
2060         if method not in ['SLR', 'LALR']:
2061             raise LALRError('Unsupported method %s' % method)
2062
2063         self.grammar = grammar
2064         self.lr_method = method
2065
2066         # Set up the logger
2067         if not log:
2068             log = NullLogger()
2069         self.log = log
2070
2071         # Internal attributes
2072         self.lr_action     = {}        # Action table
2073         self.lr_goto       = {}        # Goto table
2074         self.lr_productions  = grammar.Productions    # Copy of grammar Production array
2075         self.lr_goto_cache = {}        # Cache of computed gotos
2076         self.lr0_cidhash   = {}        # Cache of closures
2077
2078         self._add_count    = 0         # Internal counter used to detect cycles
2079
2080         # Diagonistic information filled in by the table generator
2081         self.sr_conflict   = 0
2082         self.rr_conflict   = 0
2083         self.conflicts     = []        # List of conflicts
2084
2085         self.sr_conflicts  = []
2086         self.rr_conflicts  = []
2087
2088         # Build the tables
2089         self.grammar.build_lritems()
2090         self.grammar.compute_first()
2091         self.grammar.compute_follow()
2092         self.lr_parse_table()
2093
2094     # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2095
2096     def lr0_closure(self, I):
2097         self._add_count += 1
2098
2099         # Add everything in I to J
2100         J = I[:]
2101         didadd = True
2102         while didadd:
2103             didadd = False
2104             for j in J:
2105                 for x in j.lr_after:
2106                     if getattr(x, 'lr0_added', 0) == self._add_count:
2107                         continue
2108                     # Add B --> .G to J
2109                     J.append(x.lr_next)
2110                     x.lr0_added = self._add_count
2111                     didadd = True
2112
2113         return J
2114
2115     # Compute the LR(0) goto function goto(I,X) where I is a set
2116     # of LR(0) items and X is a grammar symbol.   This function is written
2117     # in a way that guarantees uniqueness of the generated goto sets
2118     # (i.e. the same goto set will never be returned as two different Python
2119     # objects).  With uniqueness, we can later do fast set comparisons using
2120     # id(obj) instead of element-wise comparison.
2121
2122     def lr0_goto(self, I, x):
2123         # First we look for a previously cached entry
2124         g = self.lr_goto_cache.get((id(I), x))
2125         if g:
2126             return g
2127
2128         # Now we generate the goto set in a way that guarantees uniqueness
2129         # of the result
2130
2131         s = self.lr_goto_cache.get(x)
2132         if not s:
2133             s = {}
2134             self.lr_goto_cache[x] = s
2135
2136         gs = []
2137         for p in I:
2138             n = p.lr_next
2139             if n and n.lr_before == x:
2140                 s1 = s.get(id(n))
2141                 if not s1:
2142                     s1 = {}
2143                     s[id(n)] = s1
2144                 gs.append(n)
2145                 s = s1
2146         g = s.get('$end')
2147         if not g:
2148             if gs:
2149                 g = self.lr0_closure(gs)
2150                 s['$end'] = g
2151             else:
2152                 s['$end'] = gs
2153         self.lr_goto_cache[(id(I), x)] = g
2154         return g
2155
2156     # Compute the LR(0) sets of item function
2157     def lr0_items(self):
2158         C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2159         i = 0
2160         for I in C:
2161             self.lr0_cidhash[id(I)] = i
2162             i += 1
2163
2164         # Loop over the items in C and each grammar symbols
2165         i = 0
2166         while i < len(C):
2167             I = C[i]
2168             i += 1
2169
2170             # Collect all of the symbols that could possibly be in the goto(I,X) sets
2171             asyms = {}
2172             for ii in I:
2173                 for s in ii.usyms:
2174                     asyms[s] = None
2175
2176             for x in asyms:
2177                 g = self.lr0_goto(I, x)
2178                 if not g or id(g) in self.lr0_cidhash:
2179                     continue
2180                 self.lr0_cidhash[id(g)] = len(C)
2181                 C.append(g)
2182
2183         return C
2184
2185     # -----------------------------------------------------------------------------
2186     #                       ==== LALR(1) Parsing ====
2187     #
2188     # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189     # relying upon Follow() sets when performing reductions, a more selective
2190     # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191     # Thus, we mainly just have to focus on calculating the lookahead sets.
2192     #
2193     # The method used here is due to DeRemer and Pennelo (1982).
2194     #
2195     # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196     #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197     #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2198     #
2199     # Further details can also be found in:
2200     #
2201     #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202     #      McGraw-Hill Book Company, (1985).
2203     #
2204     # -----------------------------------------------------------------------------
2205
2206     # -----------------------------------------------------------------------------
2207     # compute_nullable_nonterminals()
2208     #
2209     # Creates a dictionary containing all of the non-terminals that might produce
2210     # an empty production.
2211     # -----------------------------------------------------------------------------
2212
2213     def compute_nullable_nonterminals(self):
2214         nullable = set()
2215         num_nullable = 0
2216         while True:
2217             for p in self.grammar.Productions[1:]:
2218                 if p.len == 0:
2219                     nullable.add(p.name)
2220                     continue
2221                 for t in p.prod:
2222                     if t not in nullable:
2223                         break
2224                 else:
2225                     nullable.add(p.name)
2226             if len(nullable) == num_nullable:
2227                 break
2228             num_nullable = len(nullable)
2229         return nullable
2230
2231     # -----------------------------------------------------------------------------
2232     # find_nonterminal_trans(C)
2233     #
2234     # Given a set of LR(0) items, this functions finds all of the non-terminal
2235     # transitions.    These are transitions in which a dot appears immediately before
2236     # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2237     # is the state number and N is the nonterminal symbol.
2238     #
2239     # The input C is the set of LR(0) items.
2240     # -----------------------------------------------------------------------------
2241
2242     def find_nonterminal_transitions(self, C):
2243         trans = []
2244         for stateno, state in enumerate(C):
2245             for p in state:
2246                 if p.lr_index < p.len - 1:
2247                     t = (stateno, p.prod[p.lr_index+1])
2248                     if t[1] in self.grammar.Nonterminals:
2249                         if t not in trans:
2250                             trans.append(t)
2251         return trans
2252
2253     # -----------------------------------------------------------------------------
2254     # dr_relation()
2255     #
2256     # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2257     # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2258     #
2259     # Returns a list of terminals.
2260     # -----------------------------------------------------------------------------
2261
2262     def dr_relation(self, C, trans, nullable):
2263         dr_set = {}
2264         state, N = trans
2265         terms = []
2266
2267         g = self.lr0_goto(C[state], N)
2268         for p in g:
2269             if p.lr_index < p.len - 1:
2270                 a = p.prod[p.lr_index+1]
2271                 if a in self.grammar.Terminals:
2272                     if a not in terms:
2273                         terms.append(a)
2274
2275         # This extra bit is to handle the start state
2276         if state == 0 and N == self.grammar.Productions[0].prod[0]:
2277             terms.append('$end')
2278
2279         return terms
2280
2281     # -----------------------------------------------------------------------------
2282     # reads_relation()
2283     #
2284     # Computes the READS() relation (p,A) READS (t,C).
2285     # -----------------------------------------------------------------------------
2286
2287     def reads_relation(self, C, trans, empty):
2288         # Look for empty transitions
2289         rel = []
2290         state, N = trans
2291
2292         g = self.lr0_goto(C[state], N)
2293         j = self.lr0_cidhash.get(id(g), -1)
2294         for p in g:
2295             if p.lr_index < p.len - 1:
2296                 a = p.prod[p.lr_index + 1]
2297                 if a in empty:
2298                     rel.append((j, a))
2299
2300         return rel
2301
2302     # -----------------------------------------------------------------------------
2303     # compute_lookback_includes()
2304     #
2305     # Determines the lookback and includes relations
2306     #
2307     # LOOKBACK:
2308     #
2309     # This relation is determined by running the LR(0) state machine forward.
2310     # For example, starting with a production "N : . A B C", we run it forward
2311     # to obtain "N : A B C ."   We then build a relationship between this final
2312     # state and the starting state.   These relationships are stored in a dictionary
2313     # lookdict.
2314     #
2315     # INCLUDES:
2316     #
2317     # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2318     #
2319     # This relation is used to determine non-terminal transitions that occur
2320     # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2321     # if the following holds:
2322     #
2323     #       B -> LAT, where T -> epsilon and p' -L-> p
2324     #
2325     # L is essentially a prefix (which may be empty), T is a suffix that must be
2326     # able to derive an empty string.  State p' must lead to state p with the string L.
2327     #
2328     # -----------------------------------------------------------------------------
2329
2330     def compute_lookback_includes(self, C, trans, nullable):
2331         lookdict = {}          # Dictionary of lookback relations
2332         includedict = {}       # Dictionary of include relations
2333
2334         # Make a dictionary of non-terminal transitions
2335         dtrans = {}
2336         for t in trans:
2337             dtrans[t] = 1
2338
2339         # Loop over all transitions and compute lookbacks and includes
2340         for state, N in trans:
2341             lookb = []
2342             includes = []
2343             for p in C[state]:
2344                 if p.name != N:
2345                     continue
2346
2347                 # Okay, we have a name match.  We now follow the production all the way
2348                 # through the state machine until we get the . on the right hand side
2349
2350                 lr_index = p.lr_index
2351                 j = state
2352                 while lr_index < p.len - 1:
2353                     lr_index = lr_index + 1
2354                     t = p.prod[lr_index]
2355
2356                     # Check to see if this symbol and state are a non-terminal transition
2357                     if (j, t) in dtrans:
2358                         # Yes.  Okay, there is some chance that this is an includes relation
2359                         # the only way to know for certain is whether the rest of the
2360                         # production derives empty
2361
2362                         li = lr_index + 1
2363                         while li < p.len:
2364                             if p.prod[li] in self.grammar.Terminals:
2365                                 break      # No forget it
2366                             if p.prod[li] not in nullable:
2367                                 break
2368                             li = li + 1
2369                         else:
2370                             # Appears to be a relation between (j,t) and (state,N)
2371                             includes.append((j, t))
2372
2373                     g = self.lr0_goto(C[j], t)               # Go to next set
2374                     j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
2375
2376                 # When we get here, j is the final state, now we have to locate the production
2377                 for r in C[j]:
2378                     if r.name != p.name:
2379                         continue
2380                     if r.len != p.len:
2381                         continue
2382                     i = 0
2383                     # This look is comparing a production ". A B C" with "A B C ."
2384                     while i < r.lr_index:
2385                         if r.prod[i] != p.prod[i+1]:
2386                             break
2387                         i = i + 1
2388                     else:
2389                         lookb.append((j, r))
2390             for i in includes:
2391                 if i not in includedict:
2392                     includedict[i] = []
2393                 includedict[i].append((state, N))
2394             lookdict[(state, N)] = lookb
2395
2396         return lookdict, includedict
2397
2398     # -----------------------------------------------------------------------------
2399     # compute_read_sets()
2400     #
2401     # Given a set of LR(0) items, this function computes the read sets.
2402     #
2403     # Inputs:  C        =  Set of LR(0) items
2404     #          ntrans   = Set of nonterminal transitions
2405     #          nullable = Set of empty transitions
2406     #
2407     # Returns a set containing the read sets
2408     # -----------------------------------------------------------------------------
2409
2410     def compute_read_sets(self, C, ntrans, nullable):
2411         FP = lambda x: self.dr_relation(C, x, nullable)
2412         R =  lambda x: self.reads_relation(C, x, nullable)
2413         F = digraph(ntrans, R, FP)
2414         return F
2415
2416     # -----------------------------------------------------------------------------
2417     # compute_follow_sets()
2418     #
2419     # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2420     # and an include set, this function computes the follow sets
2421     #
2422     # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2423     #
2424     # Inputs:
2425     #            ntrans     = Set of nonterminal transitions
2426     #            readsets   = Readset (previously computed)
2427     #            inclsets   = Include sets (previously computed)
2428     #
2429     # Returns a set containing the follow sets
2430     # -----------------------------------------------------------------------------
2431
2432     def compute_follow_sets(self, ntrans, readsets, inclsets):
2433         FP = lambda x: readsets[x]
2434         R  = lambda x: inclsets.get(x, [])
2435         F = digraph(ntrans, R, FP)
2436         return F
2437
2438     # -----------------------------------------------------------------------------
2439     # add_lookaheads()
2440     #
2441     # Attaches the lookahead symbols to grammar rules.
2442     #
2443     # Inputs:    lookbacks         -  Set of lookback relations
2444     #            followset         -  Computed follow set
2445     #
2446     # This function directly attaches the lookaheads to productions contained
2447     # in the lookbacks set
2448     # -----------------------------------------------------------------------------
2449
2450     def add_lookaheads(self, lookbacks, followset):
2451         for trans, lb in lookbacks.items():
2452             # Loop over productions in lookback
2453             for state, p in lb:
2454                 if state not in p.lookaheads:
2455                     p.lookaheads[state] = []
2456                 f = followset.get(trans, [])
2457                 for a in f:
2458                     if a not in p.lookaheads[state]:
2459                         p.lookaheads[state].append(a)
2460
2461     # -----------------------------------------------------------------------------
2462     # add_lalr_lookaheads()
2463     #
2464     # This function does all of the work of adding lookahead information for use
2465     # with LALR parsing
2466     # -----------------------------------------------------------------------------
2467
2468     def add_lalr_lookaheads(self, C):
2469         # Determine all of the nullable nonterminals
2470         nullable = self.compute_nullable_nonterminals()
2471
2472         # Find all non-terminal transitions
2473         trans = self.find_nonterminal_transitions(C)
2474
2475         # Compute read sets
2476         readsets = self.compute_read_sets(C, trans, nullable)
2477
2478         # Compute lookback/includes relations
2479         lookd, included = self.compute_lookback_includes(C, trans, nullable)
2480
2481         # Compute LALR FOLLOW sets
2482         followsets = self.compute_follow_sets(trans, readsets, included)
2483
2484         # Add all of the lookaheads
2485         self.add_lookaheads(lookd, followsets)
2486
2487     # -----------------------------------------------------------------------------
2488     # lr_parse_table()
2489     #
2490     # This function constructs the parse tables for SLR or LALR
2491     # -----------------------------------------------------------------------------
2492     def lr_parse_table(self):
2493         Productions = self.grammar.Productions
2494         Precedence  = self.grammar.Precedence
2495         goto   = self.lr_goto         # Goto array
2496         action = self.lr_action       # Action array
2497         log    = self.log             # Logger for output
2498
2499         actionp = {}                  # Action production array (temporary)
2500
2501         log.info('Parsing method: %s', self.lr_method)
2502
2503         # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2504         # This determines the number of states
2505
2506         C = self.lr0_items()
2507
2508         if self.lr_method == 'LALR':
2509             self.add_lalr_lookaheads(C)
2510
2511         # Build the parser table, state by state
2512         st = 0
2513         for I in C:
2514             # Loop over each production in I
2515             actlist = []              # List of actions
2516             st_action  = {}
2517             st_actionp = {}
2518             st_goto    = {}
2519             log.info('')
2520             log.info('state %d', st)
2521             log.info('')
2522             for p in I:
2523                 log.info('    (%d) %s', p.number, p)
2524             log.info('')
2525
2526             for p in I:
2527                     if p.len == p.lr_index + 1:
2528                         if p.name == "S'":
2529                             # Start symbol. Accept!
2530                             st_action['$end'] = 0
2531                             st_actionp['$end'] = p
2532                         else:
2533                             # We are at the end of a production.  Reduce!
2534                             if self.lr_method == 'LALR':
2535                                 laheads = p.lookaheads[st]
2536                             else:
2537                                 laheads = self.grammar.Follow[p.name]
2538                             for a in laheads:
2539                                 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2540                                 r = st_action.get(a)
2541                                 if r is not None:
2542                                     # Whoa. Have a shift/reduce or reduce/reduce conflict
2543                                     if r > 0:
2544                                         # Need to decide on shift or reduce here
2545                                         # By default we favor shifting. Need to add
2546                                         # some precedence rules here.
2547                                         sprec, slevel = Productions[st_actionp[a].number].prec
2548                                         rprec, rlevel = Precedence.get(a, ('right', 0))
2549                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2550                                             # We really need to reduce here.
2551                                             st_action[a] = -p.number
2552                                             st_actionp[a] = p
2553                                             if not slevel and not rlevel:
2554                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2555                                                 self.sr_conflicts.append((st, a, 'reduce'))
2556                                             Productions[p.number].reduced += 1
2557                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2558                                             st_action[a] = None
2559                                         else:
2560                                             # Hmmm. Guess we'll keep the shift
2561                                             if not rlevel:
2562                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2563                                                 self.sr_conflicts.append((st, a, 'shift'))
2564                                     elif r < 0:
2565                                         # Reduce/reduce conflict.   In this case, we favor the rule
2566                                         # that was defined first in the grammar file
2567                                         oldp = Productions[-r]
2568                                         pp = Productions[p.number]
2569                                         if oldp.line > pp.line:
2570                                             st_action[a] = -p.number
2571                                             st_actionp[a] = p
2572                                             chosenp, rejectp = pp, oldp
2573                                             Productions[p.number].reduced += 1
2574                                             Productions[oldp.number].reduced -= 1
2575                                         else:
2576                                             chosenp, rejectp = oldp, pp
2577                                         self.rr_conflicts.append((st, chosenp, rejectp))
2578                                         log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2579                                                  a, st_actionp[a].number, st_actionp[a])
2580                                     else:
2581                                         raise LALRError('Unknown conflict in state %d' % st)
2582                                 else:
2583                                     st_action[a] = -p.number
2584                                     st_actionp[a] = p
2585                                     Productions[p.number].reduced += 1
2586                     else:
2587                         i = p.lr_index
2588                         a = p.prod[i+1]       # Get symbol right after the "."
2589                         if a in self.grammar.Terminals:
2590                             g = self.lr0_goto(I, a)
2591                             j = self.lr0_cidhash.get(id(g), -1)
2592                             if j >= 0:
2593                                 # We are in a shift state
2594                                 actlist.append((a, p, 'shift and go to state %d' % j))
2595                                 r = st_action.get(a)
2596                                 if r is not None:
2597                                     # Whoa have a shift/reduce or shift/shift conflict
2598                                     if r > 0:
2599                                         if r != j:
2600                                             raise LALRError('Shift/shift conflict in state %d' % st)
2601                                     elif r < 0:
2602                                         # Do a precedence check.
2603                                         #   -  if precedence of reduce rule is higher, we reduce.
2604                                         #   -  if precedence of reduce is same and left assoc, we reduce.
2605                                         #   -  otherwise we shift
2606                                         rprec, rlevel = Productions[st_actionp[a].number].prec
2607                                         sprec, slevel = Precedence.get(a, ('right', 0))
2608                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2609                                             # We decide to shift here... highest precedence to shift
2610                                             Productions[st_actionp[a].number].reduced -= 1
2611                                             st_action[a] = j
2612                                             st_actionp[a] = p
2613                                             if not rlevel:
2614                                                 log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2615                                                 self.sr_conflicts.append((st, a, 'shift'))
2616                                         elif (slevel == rlevel) and (rprec == 'nonassoc'):
2617                                             st_action[a] = None
2618                                         else:
2619                                             # Hmmm. Guess we'll keep the reduce
2620                                             if not slevel and not rlevel:
2621                                                 log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2622                                                 self.sr_conflicts.append((st, a, 'reduce'))
2623
2624                                     else:
2625                                         raise LALRError('Unknown conflict in state %d' % st)
2626                                 else:
2627                                     st_action[a] = j
2628                                     st_actionp[a] = p
2629
2630             # Print the actions associated with each terminal
2631             _actprint = {}
2632             for a, p, m in actlist:
2633                 if a in st_action:
2634                     if p is st_actionp[a]:
2635                         log.info('    %-15s %s', a, m)
2636                         _actprint[(a, m)] = 1
2637             log.info('')
2638             # Print the actions that were not used. (debugging)
2639             not_used = 0
2640             for a, p, m in actlist:
2641                 if a in st_action:
2642                     if p is not st_actionp[a]:
2643                         if not (a, m) in _actprint:
2644                             log.debug('  ! %-15s [ %s ]', a, m)
2645                             not_used = 1
2646                             _actprint[(a, m)] = 1
2647             if not_used:
2648                 log.debug('')
2649
2650             # Construct the goto table for this state
2651
2652             nkeys = {}
2653             for ii in I:
2654                 for s in ii.usyms:
2655                     if s in self.grammar.Nonterminals:
2656                         nkeys[s] = None
2657             for n in nkeys:
2658                 g = self.lr0_goto(I, n)
2659                 j = self.lr0_cidhash.get(id(g), -1)
2660                 if j >= 0:
2661                     st_goto[n] = j
2662                     log.info('    %-30s shift and go to state %d', n, j)
2663
2664             action[st] = st_action
2665             actionp[st] = st_actionp
2666             goto[st] = st_goto
2667             st += 1
2668
2669     # -----------------------------------------------------------------------------
2670     # write()
2671     #
2672     # This function writes the LR parsing tables to a file
2673     # -----------------------------------------------------------------------------
2674
2675     def write_table(self, tabmodule, outputdir='', signature=''):
2676         if isinstance(tabmodule, types.ModuleType):
2677             raise IOError("Won't overwrite existing tabmodule")
2678
2679         basemodulename = tabmodule.split('.')[-1]
2680         filename = os.path.join(outputdir, basemodulename) + '.py'
2681         try:
2682             f = open(filename, 'w')
2683
2684             f.write('''
2685 # %s
2686 # This file is automatically generated. Do not edit.
2687 _tabversion = %r
2688
2689 _lr_method = %r
2690
2691 _lr_signature = %r
2692     ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2693
2694             # Change smaller to 0 to go back to original tables
2695             smaller = 1
2696
2697             # Factor out names to try and make smaller
2698             if smaller:
2699                 items = {}
2700
2701                 for s, nd in self.lr_action.items():
2702                     for name, v in nd.items():
2703                         i = items.get(name)
2704                         if not i:
2705                             i = ([], [])
2706                             items[name] = i
2707                         i[0].append(s)
2708                         i[1].append(v)
2709
2710                 f.write('\n_lr_action_items = {')
2711                 for k, v in items.items():
2712                     f.write('%r:([' % k)
2713                     for i in v[0]:
2714                         f.write('%r,' % i)
2715                     f.write('],[')
2716                     for i in v[1]:
2717                         f.write('%r,' % i)
2718
2719                     f.write(']),')
2720                 f.write('}\n')
2721
2722                 f.write('''
2723 _lr_action = {}
2724 for _k, _v in _lr_action_items.items():
2725    for _x,_y in zip(_v[0],_v[1]):
2726       if not _x in _lr_action:  _lr_action[_x] = {}
2727       _lr_action[_x][_k] = _y
2728 del _lr_action_items
2729 ''')
2730
2731             else:
2732                 f.write('\n_lr_action = { ')
2733                 for k, v in self.lr_action.items():
2734                     f.write('(%r,%r):%r,' % (k[0], k[1], v))
2735                 f.write('}\n')
2736
2737             if smaller:
2738                 # Factor out names to try and make smaller
2739                 items = {}
2740
2741                 for s, nd in self.lr_goto.items():
2742                     for name, v in nd.items():
2743                         i = items.get(name)
2744                         if not i:
2745                             i = ([], [])
2746                             items[name] = i
2747                         i[0].append(s)
2748                         i[1].append(v)
2749
2750                 f.write('\n_lr_goto_items = {')
2751                 for k, v in items.items():
2752                     f.write('%r:([' % k)
2753                     for i in v[0]:
2754                         f.write('%r,' % i)
2755                     f.write('],[')
2756                     for i in v[1]:
2757                         f.write('%r,' % i)
2758
2759                     f.write(']),')
2760                 f.write('}\n')
2761
2762                 f.write('''
2763 _lr_goto = {}
2764 for _k, _v in _lr_goto_items.items():
2765    for _x, _y in zip(_v[0], _v[1]):
2766        if not _x in _lr_goto: _lr_goto[_x] = {}
2767        _lr_goto[_x][_k] = _y
2768 del _lr_goto_items
2769 ''')
2770             else:
2771                 f.write('\n_lr_goto = { ')
2772                 for k, v in self.lr_goto.items():
2773                     f.write('(%r,%r):%r,' % (k[0], k[1], v))
2774                 f.write('}\n')
2775
2776             # Write production table
2777             f.write('_lr_productions = [\n')
2778             for p in self.lr_productions:
2779                 if p.func:
2780                     f.write('  (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2781                                                           p.func, os.path.basename(p.file), p.line))
2782                 else:
2783                     f.write('  (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2784             f.write(']\n')
2785             f.close()
2786
2787         except IOError as e:
2788             raise
2789
2790
2791     # -----------------------------------------------------------------------------
2792     # pickle_table()
2793     #
2794     # This function pickles the LR parsing tables to a supplied file object
2795     # -----------------------------------------------------------------------------
2796
2797     def pickle_table(self, filename, signature=''):
2798         try:
2799             import cPickle as pickle
2800         except ImportError:
2801             import pickle
2802         with open(filename, 'wb') as outf:
2803             pickle.dump(__tabversion__, outf, pickle_protocol)
2804             pickle.dump(self.lr_method, outf, pickle_protocol)
2805             pickle.dump(signature, outf, pickle_protocol)
2806             pickle.dump(self.lr_action, outf, pickle_protocol)
2807             pickle.dump(self.lr_goto, outf, pickle_protocol)
2808
2809             outp = []
2810             for p in self.lr_productions:
2811                 if p.func:
2812                     outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2813                 else:
2814                     outp.append((str(p), p.name, p.len, None, None, None))
2815             pickle.dump(outp, outf, pickle_protocol)
2816
2817 # -----------------------------------------------------------------------------
2818 #                            === INTROSPECTION ===
2819 #
2820 # The following functions and classes are used to implement the PLY
2821 # introspection features followed by the yacc() function itself.
2822 # -----------------------------------------------------------------------------
2823
2824 # -----------------------------------------------------------------------------
2825 # get_caller_module_dict()
2826 #
2827 # This function returns a dictionary containing all of the symbols defined within
2828 # a caller further down the call stack.  This is used to get the environment
2829 # associated with the yacc() call if none was provided.
2830 # -----------------------------------------------------------------------------
2831
2832 def get_caller_module_dict(levels):
2833     f = sys._getframe(levels)
2834     ldict = f.f_globals.copy()
2835     if f.f_globals != f.f_locals:
2836         ldict.update(f.f_locals)
2837     return ldict
2838
2839 # -----------------------------------------------------------------------------
2840 # parse_grammar()
2841 #
2842 # This takes a raw grammar rule string and parses it into production data
2843 # -----------------------------------------------------------------------------
2844 def parse_grammar(doc, file, line):
2845     grammar = []
2846     # Split the doc string into lines
2847     pstrings = doc.splitlines()
2848     lastp = None
2849     dline = line
2850     for ps in pstrings:
2851         dline += 1
2852         p = ps.split()
2853         if not p:
2854             continue
2855         try:
2856             if p[0] == '|':
2857                 # This is a continuation of a previous rule
2858                 if not lastp:
2859                     raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2860                 prodname = lastp
2861                 syms = p[1:]
2862             else:
2863                 prodname = p[0]
2864                 lastp = prodname
2865                 syms   = p[2:]
2866                 assign = p[1]
2867                 if assign != ':' and assign != '::=':
2868                     raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2869
2870             grammar.append((file, dline, prodname, syms))
2871         except SyntaxError:
2872             raise
2873         except Exception:
2874             raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2875
2876     return grammar
2877
2878 # -----------------------------------------------------------------------------
2879 # ParserReflect()
2880 #
2881 # This class represents information extracted for building a parser including
2882 # start symbol, error function, tokens, precedence list, action functions,
2883 # etc.
2884 # -----------------------------------------------------------------------------
2885 class ParserReflect(object):
2886     def __init__(self, pdict, log=None):
2887         self.pdict      = pdict
2888         self.start      = None
2889         self.error_func = None
2890         self.tokens     = None
2891         self.modules    = set()
2892         self.grammar    = []
2893         self.error      = False
2894
2895         if log is None:
2896             self.log = PlyLogger(sys.stderr)
2897         else:
2898             self.log = log
2899
2900     # Get all of the basic information
2901     def get_all(self):
2902         self.get_start()
2903         self.get_error_func()
2904         self.get_tokens()
2905         self.get_precedence()
2906         self.get_pfunctions()
2907
2908     # Validate all of the information
2909     def validate_all(self):
2910         self.validate_start()
2911         self.validate_error_func()
2912         self.validate_tokens()
2913         self.validate_precedence()
2914         self.validate_pfunctions()
2915         self.validate_modules()
2916         return self.error
2917
2918     # Compute a signature over the grammar
2919     def signature(self):
2920         try:
2921             from hashlib import md5
2922         except ImportError:
2923             from md5 import md5
2924         try:
2925             sig = md5()
2926             if self.start:
2927                 sig.update(self.start.encode('latin-1'))
2928             if self.prec:
2929                 sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
2930             if self.tokens:
2931                 sig.update(' '.join(self.tokens).encode('latin-1'))
2932             for f in self.pfuncs:
2933                 if f[3]:
2934                     sig.update(f[3].encode('latin-1'))
2935         except (TypeError, ValueError):
2936             pass
2937
2938         digest = base64.b16encode(sig.digest())
2939         if sys.version_info[0] >= 3:
2940             digest = digest.decode('latin-1')
2941         return digest
2942
2943     # -----------------------------------------------------------------------------
2944     # validate_modules()
2945     #
2946     # This method checks to see if there are duplicated p_rulename() functions
2947     # in the parser module file.  Without this function, it is really easy for
2948     # users to make mistakes by cutting and pasting code fragments (and it's a real
2949     # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2950     # we just do a little regular expression pattern matching of def statements
2951     # to try and detect duplicates.
2952     # -----------------------------------------------------------------------------
2953
2954     def validate_modules(self):
2955         # Match def p_funcname(
2956         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2957
2958         for module in self.modules:
2959             lines, linen = inspect.getsourcelines(module)
2960
2961             counthash = {}
2962             for linen, line in enumerate(lines):
2963                 linen += 1
2964                 m = fre.match(line)
2965                 if m:
2966                     name = m.group(1)
2967                     prev = counthash.get(name)
2968                     if not prev:
2969                         counthash[name] = linen
2970                     else:
2971                         filename = inspect.getsourcefile(module)
2972                         self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2973                                          filename, linen, name, prev)
2974
2975     # Get the start symbol
2976     def get_start(self):
2977         self.start = self.pdict.get('start')
2978
2979     # Validate the start symbol
2980     def validate_start(self):
2981         if self.start is not None:
2982             if not isinstance(self.start, string_types):
2983                 self.log.error("'start' must be a string")
2984
2985     # Look for error handler
2986     def get_error_func(self):
2987         self.error_func = self.pdict.get('p_error')
2988
2989     # Validate the error function
2990     def validate_error_func(self):
2991         if self.error_func:
2992             if isinstance(self.error_func, types.FunctionType):
2993                 ismethod = 0
2994             elif isinstance(self.error_func, types.MethodType):
2995                 ismethod = 1
2996             else:
2997                 self.log.error("'p_error' defined, but is not a function or method")
2998                 self.error = True
2999                 return
3000
3001             eline = self.error_func.__code__.co_firstlineno
3002             efile = self.error_func.__code__.co_filename
3003             module = inspect.getmodule(self.error_func)
3004             self.modules.add(module)
3005
3006             argcount = self.error_func.__code__.co_argcount - ismethod
3007             if argcount != 1:
3008                 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3009                 self.error = True
3010
3011     # Get the tokens map
3012     def get_tokens(self):
3013         tokens = self.pdict.get('tokens')
3014         if not tokens:
3015             self.log.error('No token list is defined')
3016             self.error = True
3017             return
3018
3019         if not isinstance(tokens, (list, tuple)):
3020             self.log.error('tokens must be a list or tuple')
3021             self.error = True
3022             return
3023
3024         if not tokens:
3025             self.log.error('tokens is empty')
3026             self.error = True
3027             return
3028
3029         self.tokens = tokens
3030
3031     # Validate the tokens
3032     def validate_tokens(self):
3033         # Validate the tokens.
3034         if 'error' in self.tokens:
3035             self.log.error("Illegal token name 'error'. Is a reserved word")
3036             self.error = True
3037             return
3038
3039         terminals = set()
3040         for n in self.tokens:
3041             if n in terminals:
3042                 self.log.warning('Token %r multiply defined', n)
3043             terminals.add(n)
3044
3045     # Get the precedence map (if any)
3046     def get_precedence(self):
3047         self.prec = self.pdict.get('precedence')
3048
3049     # Validate and parse the precedence map
3050     def validate_precedence(self):
3051         preclist = []
3052         if self.prec:
3053             if not isinstance(self.prec, (list, tuple)):
3054                 self.log.error('precedence must be a list or tuple')
3055                 self.error = True
3056                 return
3057             for level, p in enumerate(self.prec):
3058                 if not isinstance(p, (list, tuple)):
3059                     self.log.error('Bad precedence table')
3060                     self.error = True
3061                     return
3062
3063                 if len(p) < 2:
3064                     self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3065                     self.error = True
3066                     return
3067                 assoc = p[0]
3068                 if not isinstance(assoc, string_types):
3069                     self.log.error('precedence associativity must be a string')
3070                     self.error = True
3071                     return
3072                 for term in p[1:]:
3073                     if not isinstance(term, string_types):
3074                         self.log.error('precedence items must be strings')
3075                         self.error = True
3076                         return
3077                     preclist.append((term, assoc, level+1))
3078         self.preclist = preclist
3079
3080     # Get all p_functions from the grammar
3081     def get_pfunctions(self):
3082         p_functions = []
3083         for name, item in self.pdict.items():
3084             if not name.startswith('p_') or name == 'p_error':
3085                 continue
3086             if isinstance(item, (types.FunctionType, types.MethodType)):
3087                 line = item.__code__.co_firstlineno
3088                 module = inspect.getmodule(item)
3089                 p_functions.append((line, module, name, item.__doc__))
3090
3091         # Sort all of the actions by line number; make sure to stringify
3092         # modules to make them sortable, since `line` may not uniquely sort all
3093         # p functions
3094         p_functions.sort(key=lambda p_function: (
3095             p_function[0],
3096             str(p_function[1]),
3097             p_function[2],
3098             p_function[3]))
3099         self.pfuncs = p_functions
3100
3101     # Validate all of the p_functions
3102     def validate_pfunctions(self):
3103         grammar = []
3104         # Check for non-empty symbols
3105         if len(self.pfuncs) == 0:
3106             self.log.error('no rules of the form p_rulename are defined')
3107             self.error = True
3108             return
3109
3110         for line, module, name, doc in self.pfuncs:
3111             file = inspect.getsourcefile(module)
3112             func = self.pdict[name]
3113             if isinstance(func, types.MethodType):
3114                 reqargs = 2
3115             else:
3116                 reqargs = 1
3117             if func.__code__.co_argcount > reqargs:
3118                 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3119                 self.error = True
3120             elif func.__code__.co_argcount < reqargs:
3121                 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3122                 self.error = True
3123             elif not func.__doc__:
3124                 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3125                                  file, line, func.__name__)
3126             else:
3127                 try:
3128                     parsed_g = parse_grammar(doc, file, line)
3129                     for g in parsed_g:
3130                         grammar.append((name, g))
3131                 except SyntaxError as e:
3132                     self.log.error(str(e))
3133                     self.error = True
3134
3135                 # Looks like a valid grammar rule
3136                 # Mark the file in which defined.
3137                 self.modules.add(module)
3138
3139         # Secondary validation step that looks for p_ definitions that are not functions
3140         # or functions that look like they might be grammar rules.
3141
3142         for n, v in self.pdict.items():
3143             if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3144                 continue
3145             if n.startswith('t_'):
3146                 continue
3147             if n.startswith('p_') and n != 'p_error':
3148                 self.log.warning('%r not defined as a function', n)
3149             if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3150                    (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3151                 if v.__doc__:
3152                     try:
3153                         doc = v.__doc__.split(' ')
3154                         if doc[1] == ':':
3155                             self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3156                                              v.__code__.co_filename, v.__code__.co_firstlineno, n)
3157                     except IndexError:
3158                         pass
3159
3160         self.grammar = grammar
3161
3162 # -----------------------------------------------------------------------------
3163 # yacc(module)
3164 #
3165 # Build a parser
3166 # -----------------------------------------------------------------------------
3167
3168 def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3169          check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3170          outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3171
3172     if tabmodule is None:
3173         tabmodule = tab_module
3174
3175     # Reference to the parsing method of the last built parser
3176     global parse
3177
3178     # If pickling is enabled, table files are not created
3179     if picklefile:
3180         write_tables = 0
3181
3182     if errorlog is None:
3183         errorlog = PlyLogger(sys.stderr)
3184
3185     # Get the module dictionary used for the parser
3186     if module:
3187         _items = [(k, getattr(module, k)) for k in dir(module)]
3188         pdict = dict(_items)
3189         # If no __file__ attribute is available, try to obtain it from the __module__ instead
3190         if '__file__' not in pdict:
3191             pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3192     else:
3193         pdict = get_caller_module_dict(2)
3194
3195     if outputdir is None:
3196         # If no output directory is set, the location of the output files
3197         # is determined according to the following rules:
3198         #     - If tabmodule specifies a package, files go into that package directory
3199         #     - Otherwise, files go in the same directory as the specifying module
3200         if isinstance(tabmodule, types.ModuleType):
3201             srcfile = tabmodule.__file__
3202         else:
3203             if '.' not in tabmodule:
3204                 srcfile = pdict['__file__']
3205             else:
3206                 parts = tabmodule.split('.')
3207                 pkgname = '.'.join(parts[:-1])
3208                 exec('import %s' % pkgname)
3209                 srcfile = getattr(sys.modules[pkgname], '__file__', '')
3210         outputdir = os.path.dirname(srcfile)
3211
3212     # Determine if the module is package of a package or not.
3213     # If so, fix the tabmodule setting so that tables load correctly
3214     pkg = pdict.get('__package__')
3215     if pkg and isinstance(tabmodule, str):
3216         if '.' not in tabmodule:
3217             tabmodule = pkg + '.' + tabmodule
3218
3219
3220
3221     # Set start symbol if it's specified directly using an argument
3222     if start is not None:
3223         pdict['start'] = start
3224
3225     # Collect parser information from the dictionary
3226     pinfo = ParserReflect(pdict, log=errorlog)
3227     pinfo.get_all()
3228
3229     if pinfo.error:
3230         raise YaccError('Unable to build parser')
3231
3232     # Check signature against table files (if any)
3233     signature = pinfo.signature()
3234
3235     # Read the tables
3236     try:
3237         lr = LRTable()
3238         if picklefile:
3239             read_signature = lr.read_pickle(picklefile)
3240         else:
3241             read_signature = lr.read_table(tabmodule)
3242         if optimize or (read_signature == signature):
3243             try:
3244                 lr.bind_callables(pinfo.pdict)
3245                 parser = LRParser(lr, pinfo.error_func)
3246                 parse = parser.parse
3247                 return parser
3248             except Exception as e:
3249                 errorlog.warning('There was a problem loading the table file: %r', e)
3250     except VersionError as e:
3251         errorlog.warning(str(e))
3252     except ImportError:
3253         pass
3254
3255     if debuglog is None:
3256         if debug:
3257             try:
3258                 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3259             except IOError as e:
3260                 errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3261                 debuglog = NullLogger()
3262         else:
3263             debuglog = NullLogger()
3264
3265     debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3266
3267     errors = False
3268
3269     # Validate the parser information
3270     if pinfo.validate_all():
3271         raise YaccError('Unable to build parser')
3272
3273     if not pinfo.error_func:
3274         errorlog.warning('no p_error() function is defined')
3275
3276     # Create a grammar object
3277     grammar = Grammar(pinfo.tokens)
3278
3279     # Set precedence level for terminals
3280     for term, assoc, level in pinfo.preclist:
3281         try:
3282             grammar.set_precedence(term, assoc, level)
3283         except GrammarError as e:
3284             errorlog.warning('%s', e)
3285
3286     # Add productions to the grammar
3287     for funcname, gram in pinfo.grammar:
3288         file, line, prodname, syms = gram
3289         try:
3290             grammar.add_production(prodname, syms, funcname, file, line)
3291         except GrammarError as e:
3292             errorlog.error('%s', e)
3293             errors = True
3294
3295     # Set the grammar start symbols
3296     try:
3297         if start is None:
3298             grammar.set_start(pinfo.start)
3299         else:
3300             grammar.set_start(start)
3301     except GrammarError as e:
3302         errorlog.error(str(e))
3303         errors = True
3304
3305     if errors:
3306         raise YaccError('Unable to build parser')
3307
3308     # Verify the grammar structure
3309     undefined_symbols = grammar.undefined_symbols()
3310     for sym, prod in undefined_symbols:
3311         errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3312         errors = True
3313
3314     unused_terminals = grammar.unused_terminals()
3315     if unused_terminals:
3316         debuglog.info('')
3317         debuglog.info('Unused terminals:')
3318         debuglog.info('')
3319         for term in unused_terminals:
3320             errorlog.warning('Token %r defined, but not used', term)
3321             debuglog.info('    %s', term)
3322
3323     # Print out all productions to the debug log
3324     if debug:
3325         debuglog.info('')
3326         debuglog.info('Grammar')
3327         debuglog.info('')
3328         for n, p in enumerate(grammar.Productions):
3329             debuglog.info('Rule %-5d %s', n, p)
3330
3331     # Find unused non-terminals
3332     unused_rules = grammar.unused_rules()
3333     for prod in unused_rules:
3334         errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3335
3336     if len(unused_terminals) == 1:
3337         errorlog.warning('There is 1 unused token')
3338     if len(unused_terminals) > 1:
3339         errorlog.warning('There are %d unused tokens', len(unused_terminals))
3340
3341     if len(unused_rules) == 1:
3342         errorlog.warning('There is 1 unused rule')
3343     if len(unused_rules) > 1:
3344         errorlog.warning('There are %d unused rules', len(unused_rules))
3345
3346     if debug:
3347         debuglog.info('')
3348         debuglog.info('Terminals, with rules where they appear')
3349         debuglog.info('')
3350         terms = list(grammar.Terminals)
3351         terms.sort()
3352         for term in terms:
3353             debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3354
3355         debuglog.info('')
3356         debuglog.info('Nonterminals, with rules where they appear')
3357         debuglog.info('')
3358         nonterms = list(grammar.Nonterminals)
3359         nonterms.sort()
3360         for nonterm in nonterms:
3361             debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3362         debuglog.info('')
3363
3364     if check_recursion:
3365         unreachable = grammar.find_unreachable()
3366         for u in unreachable:
3367             errorlog.warning('Symbol %r is unreachable', u)
3368
3369         infinite = grammar.infinite_cycles()
3370         for inf in infinite:
3371             errorlog.error('Infinite recursion detected for symbol %r', inf)
3372             errors = True
3373
3374     unused_prec = grammar.unused_precedence()
3375     for term, assoc in unused_prec:
3376         errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3377         errors = True
3378
3379     if errors:
3380         raise YaccError('Unable to build parser')
3381
3382     # Run the LRGeneratedTable on the grammar
3383     if debug:
3384         errorlog.debug('Generating %s tables', method)
3385
3386     lr = LRGeneratedTable(grammar, method, debuglog)
3387
3388     if debug:
3389         num_sr = len(lr.sr_conflicts)
3390
3391         # Report shift/reduce and reduce/reduce conflicts
3392         if num_sr == 1:
3393             errorlog.warning('1 shift/reduce conflict')
3394         elif num_sr > 1:
3395             errorlog.warning('%d shift/reduce conflicts', num_sr)
3396
3397         num_rr = len(lr.rr_conflicts)
3398         if num_rr == 1:
3399             errorlog.warning('1 reduce/reduce conflict')
3400         elif num_rr > 1:
3401             errorlog.warning('%d reduce/reduce conflicts', num_rr)
3402
3403     # Write out conflicts to the output file
3404     if debug and (lr.sr_conflicts or lr.rr_conflicts):
3405         debuglog.warning('')
3406         debuglog.warning('Conflicts:')
3407         debuglog.warning('')
3408
3409         for state, tok, resolution in lr.sr_conflicts:
3410             debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
3411
3412         already_reported = set()
3413         for state, rule, rejected in lr.rr_conflicts:
3414             if (state, id(rule), id(rejected)) in already_reported:
3415                 continue
3416             debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3417             debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3418             errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3419             errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3420             already_reported.add((state, id(rule), id(rejected)))
3421
3422         warned_never = []
3423         for state, rule, rejected in lr.rr_conflicts:
3424             if not rejected.reduced and (rejected not in warned_never):
3425                 debuglog.warning('Rule (%s) is never reduced', rejected)
3426                 errorlog.warning('Rule (%s) is never reduced', rejected)
3427                 warned_never.append(rejected)
3428
3429     # Write the table file if requested
3430     if write_tables:
3431         try:
3432             lr.write_table(tabmodule, outputdir, signature)
3433         except IOError as e:
3434             errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3435
3436     # Write a pickled version of the tables
3437     if picklefile:
3438         try:
3439             lr.pickle_table(picklefile, signature)
3440         except IOError as e:
3441             errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3442
3443     # Build the parser
3444     lr.bind_callables(pinfo.pdict)
3445     parser = LRParser(lr, pinfo.error_func)
3446
3447     parse = parser.parse
3448     return parser