2 ** Copyright (c) 1991, 1994, 1997, 1998 D. Richard Hipp
4 ** This file contains all sources (including headers) to the LEMON
5 ** LALR(1) parser generator. The sources have been combined into a
6 ** single file to make it easy to include LEMON as part of another
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public
11 ** License as published by the Free Software Foundation; either
12 ** version 2 of the License, or (at your option) any later version.
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ** General Public License for more details.
19 ** You should have received a copy of the GNU General Public
20 ** License along with this library; if not, write to the
21 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 ** Boston, MA 02111-1307, USA.
24 ** Author contact information:
26 ** http://www.hwaci.com/drh/
28 ** $Id: lemon.c,v 1.14 2002/08/28 21:04:11 jmayer Exp $
37 * Wrapper around "isupper()", "islower()", etc. to cast the argument to
38 * "unsigned char", so that they at least handle non-ASCII 8-bit characters
39 * (and don't provoke a pile of warnings from GCC).
41 #define safe_isupper(c) isupper((unsigned char)(c))
42 #define safe_islower(c) islower((unsigned char)(c))
43 #define safe_isalpha(c) isalpha((unsigned char)(c))
44 #define safe_isalnum(c) isalnum((unsigned char)(c))
45 #define safe_isspace(c) isspace((unsigned char)(c))
47 extern int access(const char *, int);
50 # if defined(_WIN32) || defined(WIN32)
55 /* #define PRIVATE static */
59 #define MAXRHS 5 /* Set low to exercise exception code */
64 char *msort(char *, char **, int (*)(const void *, const void *));
67 /********** From the file "struct.h" *************************************/
69 ** Principal data structures for the LEMON parser generator.
72 typedef enum {BOOL_FALSE=0, BOOL_TRUE} Boolean;
74 /* Symbols (terminals and nonterminals) of the grammar are stored
75 ** in the following: */
77 char *name; /* Name of the symbol */
78 int index; /* Index number for this symbol */
82 } type; /* Symbols are all either TERMINALS or NTs */
83 struct rule *rule; /* Linked list of rules of this (if an NT) */
84 int prec; /* Precedence if defined (-1 otherwise) */
90 } assoc; /* Associativity if predecence is defined */
91 char *firstset; /* First-set for all rules of this symbol */
92 Boolean lambda; /* True if NT and can generate an empty string */
93 char *destructor; /* Code which executes whenever this symbol is
94 ** popped from the stack during error processing */
95 int destructorln; /* Line number of destructor code */
96 char *datatype; /* The data type of information held by this
97 ** object. Only used if type==NONTERMINAL */
98 int dtnum; /* The data type number. In the parser, the value
99 ** stack is a union. The .yy%d element of this
100 ** union is the correct data type for this object */
103 /* Each production rule in the grammar is stored in the following
106 struct symbol *lhs; /* Left-hand side of the rule */
107 char *lhsalias; /* Alias for the LHS (NULL if none) */
108 int ruleline; /* Line number for the rule */
109 int nrhs; /* Number of RHS symbols */
110 struct symbol **rhs; /* The RHS symbols */
111 char **rhsalias; /* An alias for each RHS symbol (NULL if none) */
112 int line; /* Line number at which code begins */
113 char *code; /* The code executed when this rule is reduced */
114 struct symbol *precsym; /* Precedence symbol for this rule */
115 int index; /* An index number for this rule */
116 Boolean canReduce; /* True if this rule is ever reduced */
117 struct rule *nextlhs; /* Next rule with the same LHS */
118 struct rule *next; /* Next rule in the global list */
121 /* A configuration is a production rule of the grammar together with
122 ** a mark (dot) showing how much of that rule has been processed so far.
123 ** Configurations also contain a follow-set which is a list of terminal
124 ** symbols which are allowed to immediately follow the end of the rule.
125 ** Every configuration is recorded as an instance of the following: */
127 struct rule *rp; /* The rule upon which the configuration is based */
128 int dot; /* The parse point */
129 char *fws; /* Follow-set for this configuration only */
130 struct plink *fplp; /* Follow-set forward propagation links */
131 struct plink *bplp; /* Follow-set backwards propagation links */
132 struct state *stp; /* Pointer to state which contains this */
134 COMPLETE, /* The status is used during followset and */
135 INCOMPLETE /* shift computations */
137 struct config *next; /* Next configuration in the state */
138 struct config *bp; /* The next basis configuration */
141 /* Every shift or reduce operation is stored as one of the following */
143 struct symbol *sp; /* The look-ahead symbol */
149 CONFLICT, /* Was a reduce, but part of a conflict */
150 SH_RESOLVED, /* Was a shift. Precedence resolved conflict */
151 RD_RESOLVED, /* Was reduce. Precedence resolved conflict */
152 NOT_USED /* Deleted by compression */
155 struct state *stp; /* The new state, if a shift */
156 struct rule *rp; /* The rule, if a reduce */
158 struct action *next; /* Next action for this state */
159 struct action *collide; /* Next action with the same hash */
162 /* Each state of the generated parser's finite state machine
163 ** is encoded as an instance of the following structure. */
165 struct config *bp; /* The basis configurations for this state */
166 struct config *cfp; /* All configurations in this set */
167 int index; /* Sequencial number for this state */
168 struct action *ap; /* Array of actions for this state */
169 unsigned int naction; /* Number of actions for this state */
170 int tabstart; /* First index of the action table */
171 int tabdfltact; /* Default action */
174 /* A followset propagation link indicates that the contents of one
175 ** configuration followset should be propagated to another whenever
176 ** the first changes. */
178 struct config *cfp; /* The configuration to which linked */
179 struct plink *next; /* The next propagate link */
182 /* The state vector for the entire parser generator is recorded as
183 ** follows. (LEMON uses no global variables and makes little use of
184 ** static variables. Fields in the following structure can be thought
185 ** of as begin global variables in the program.) */
187 struct state **sorted; /* Table of states sorted by state number */
188 struct rule *rule; /* List of all rules */
189 int nstate; /* Number of states */
190 int nrule; /* Number of rules */
191 int nsymbol; /* Number of terminal and nonterminal symbols */
192 int nterminal; /* Number of terminal symbols */
193 struct symbol **symbols; /* Sorted array of pointers to symbols */
194 int errorcnt; /* Number of errors */
195 struct symbol *errsym; /* The error symbol */
196 char *name; /* Name of the generated parser */
197 char *arg; /* Declaration of the 3th argument to parser */
198 char *tokentype; /* Type of terminal symbols in the parser stack */
199 char *start; /* Name of the start symbol for the grammar */
200 char *stacksize; /* Size of the parser stack */
201 char *include; /* Code to put at the start of the C file */
202 int includeln; /* Line number for start of include code */
203 char *error; /* Code to execute when an error is seen */
204 int errorln; /* Line number for start of error code */
205 char *overflow; /* Code to execute on a stack overflow */
206 int overflowln; /* Line number for start of overflow code */
207 char *failure; /* Code to execute on parser failure */
208 int failureln; /* Line number for start of failure code */
209 char *accept; /* Code to execute when the parser excepts */
210 int acceptln; /* Line number for the start of accept code */
211 char *extracode; /* Code appended to the generated file */
212 int extracodeln; /* Line number for the start of the extra code */
213 char *tokendest; /* Code to execute to destroy token data */
214 int tokendestln; /* Line number for token destroyer code */
215 char *filename; /* Name of the input file */
216 char *basename; /* Basename of inputer file (no directory or path */
217 char *outname; /* Name of the current output file */
218 char *outdirname; /* Name of the output directory, specified by user */
219 char *templatename; /* Name of template file to use, specified by user */
220 char *tokenprefix; /* A prefix added to token names in the .h file */
221 int nconflict; /* Number of parsing conflicts */
222 int tablesize; /* Size of the parse tables */
223 int basisflag; /* Print only basis configurations */
224 char *argv0; /* Name of the program */
227 #define MemoryCheck(X) if((X)==0){ \
228 extern void memory_error(void); \
232 /******** From the file "action.h" *************************************/
233 struct action *Action_new(void);
234 struct action *Action_sort(struct action *);
235 void Action_add(struct action **, enum e_action, struct symbol *, void *);
237 /********* From the file "assert.h" ************************************/
238 void myassert(char *, int);
240 # define assert(X) if(!(X))myassert(__FILE__,__LINE__)
245 /********** From the file "build.h" ************************************/
246 void FindRulePrecedences(struct lemon *);
247 void FindFirstSets(struct lemon *);
248 void FindStates(struct lemon *);
249 void FindLinks(struct lemon *);
250 void FindFollowSets(struct lemon *);
251 void FindActions(struct lemon *);
253 /********* From the file "configlist.h" *********************************/
254 void Configlist_init(void);
255 struct config *Configlist_add(struct rule *, int);
256 struct config *Configlist_addbasis(struct rule *, int);
257 void Configlist_closure(struct lemon *);
258 void Configlist_sort(void);
259 void Configlist_sortbasis(void);
260 struct config *Configlist_return(void);
261 struct config *Configlist_basis(void);
262 void Configlist_eat(struct config *);
263 void Configlist_reset(void);
265 /********* From the file "error.h" ***************************************/
267 void ErrorMsg( char *, int, char *, ... )
268 __attribute__((format (printf, 3, 4)));
270 void ErrorMsg( char *, int, char *, ... );
273 /****** From the file "option.h" ******************************************/
275 enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR,
276 OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type;
281 int optinit(char**,struct s_options*,FILE*);
283 char *get_optarg(int);
284 void get_opterr(int);
287 /******** From the file "parse.h" *****************************************/
288 void Parse(struct lemon *lemp);
290 /********* From the file "plink.h" ***************************************/
291 struct plink *Plink_new(void);
292 void Plink_add(struct plink **, struct config *);
293 void Plink_copy(struct plink **, struct plink *);
294 void Plink_delete(struct plink *);
296 /********** From the file "report.h" *************************************/
297 void Reprint(struct lemon *);
298 void ReportOutput(struct lemon *);
299 void ReportTable(struct lemon *, int);
300 void ReportHeader(struct lemon *);
301 void CompressTables(struct lemon *);
303 /********** From the file "set.h" ****************************************/
304 void SetSize(int N); /* All sets will be of size N */
305 char *SetNew(void); /* A new set for element 0..N */
306 void SetFree(char*); /* Deallocate a set */
308 int SetAdd(char*,int); /* Add element to a set */
309 int SetUnion(char *A,char *B); /* A <- A U B, thru element N */
311 #define SetFind(X,Y) (X[Y]) /* True if Y is in set X */
313 /**************** From the file "table.h" *********************************/
315 ** All code in this file has been automatically generated
316 ** from a specification in the file
318 ** by the associative array code building program "aagen".
319 ** Do not edit this file! Instead, edit the specification
320 ** file, then rerun aagen.
323 ** Code for processing tables in the LEMON parser generator.
326 /* Routines for handling a strings */
328 char *Strsafe(char *);
330 void Strsafe_init(void);
331 int Strsafe_insert(char *);
332 char *Strsafe_find(char *);
334 /* Routines for handling symbols of the grammar */
336 struct symbol *Symbol_new(char *x);
337 int Symbolcmpp(const void *, const void *);
338 void Symbol_init(void);
339 int Symbol_insert(struct symbol *, char *);
340 struct symbol *Symbol_find(char *);
341 struct symbol *Symbol_Nth(int);
342 int Symbol_count(void);
343 struct symbol **Symbol_arrayof(void);
345 /* Routines to manage the state table */
347 int Configcmp(const void *, const void *);
348 struct state *State_new(void);
349 void State_init(void);
350 int State_insert(struct state *, struct config *);
351 struct state *State_find(struct config *);
352 struct state **State_arrayof(void);
354 /* Routines used for efficiency in Configlist_add */
356 void Configtable_init(void);
357 int Configtable_insert(struct config *);
358 struct config *Configtable_find(struct config *);
359 void Configtable_clear(int(*)(struct config *));
360 /****************** From the file "action.c" *******************************/
362 ** Routines processing parser actions in the LEMON parser generator.
365 /* Allocate a new parser action */
366 struct action *Action_new(void){
367 static struct action *freelist = 0;
373 freelist = (struct action *)malloc( sizeof(struct action)*amt );
375 fprintf(stderr,"Unable to allocate memory for a new parser action.");
378 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
379 freelist[amt-1].next = 0;
382 freelist = freelist->next;
386 /* Compare two actions */
387 static int actioncmp(const void *ap1_arg, const void *ap2_arg)
389 const struct action *ap1 = ap1_arg, *ap2 = ap2_arg;
391 rc = ap1->sp->index - ap2->sp->index;
392 if( rc==0 ) rc = (int)ap1->type - (int)ap2->type;
394 assert( ap1->type==REDUCE && ap2->type==REDUCE );
395 rc = ap1->x.rp->index - ap2->x.rp->index;
400 /* Sort parser actions */
401 struct action *Action_sort(struct action *ap)
403 ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp);
407 void Action_add(struct action **app, enum e_action type, struct symbol *sp,
417 new->x.stp = (struct state *)arg;
419 new->x.rp = (struct rule *)arg;
422 /********************** From the file "assert.c" ****************************/
424 ** A more efficient way of handling assertions.
426 void myassert(char *file, int line)
428 fprintf(stderr,"Assertion failed on line %d of file \"%s\"\n",line,file);
431 /********************** From the file "build.c" *****************************/
433 ** Routines to construction the finite state machine for the LEMON
437 /* Find a precedence symbol of every rule in the grammar.
439 ** Those rules which have a precedence symbol coded in the input
440 ** grammar using the "[symbol]" construct will already have the
441 ** rp->precsym field filled. Other rules take as their precedence
442 ** symbol the first RHS symbol with a defined precedence. If there
443 ** are not RHS symbols with a defined precedence, the precedence
444 ** symbol field is left blank.
446 void FindRulePrecedences(struct lemon *xp)
449 for(rp=xp->rule; rp; rp=rp->next){
450 if( rp->precsym==0 ){
452 for(i=0; i<rp->nrhs; i++){
453 if( rp->rhs[i]->prec>=0 ){
454 rp->precsym = rp->rhs[i];
463 /* Find all nonterminals which will generate the empty string.
464 ** Then go back and compute the first sets of every nonterminal.
465 ** The first set is the set of all terminal symbols which can begin
466 ** a string generated by that nonterminal.
468 void FindFirstSets(struct lemon *lemp)
474 for(i=0; i<lemp->nsymbol; i++){
475 lemp->symbols[i]->lambda = BOOL_FALSE;
477 for(i=lemp->nterminal; i<lemp->nsymbol; i++){
478 lemp->symbols[i]->firstset = SetNew();
481 /* First compute all lambdas */
484 for(rp=lemp->rule; rp; rp=rp->next){
485 if( rp->lhs->lambda ) continue;
486 for(i=0; i<rp->nrhs; i++){
487 if( rp->rhs[i]->lambda==BOOL_FALSE ) break;
490 rp->lhs->lambda = BOOL_TRUE;
496 /* Now compute all first sets */
498 struct symbol *s1, *s2;
500 for(rp=lemp->rule; rp; rp=rp->next){
502 for(i=0; i<rp->nrhs; i++){
504 if( s2->type==TERMINAL ){
505 progress += SetAdd(s1->firstset,s2->index);
508 if( s1->lambda==BOOL_FALSE ) break;
510 progress += SetUnion(s1->firstset,s2->firstset);
511 if( s2->lambda==BOOL_FALSE ) break;
519 /* Compute all LR(0) states for the grammar. Links
520 ** are added to between some states so that the LR(1) follow sets
521 ** can be computed later.
523 PRIVATE struct state *getstate(struct lemon *); /* forward reference */
524 void FindStates(lemp)
532 /* Find the start symbol */
534 sp = Symbol_find(lemp->start);
536 ErrorMsg(lemp->filename,0,
537 "The specified start symbol \"%s\" is not \
538 in a nonterminal of the grammar. \"%s\" will be used as the start \
539 symbol instead.",lemp->start,lemp->rule->lhs->name);
541 sp = lemp->rule->lhs;
544 sp = lemp->rule->lhs;
547 /* Make sure the start symbol doesn't occur on the right-hand side of
548 ** any rule. Report an error if it does. (YACC would generate a new
549 ** start symbol in this case.) */
550 for(rp=lemp->rule; rp; rp=rp->next){
552 for(i=0; i<rp->nrhs; i++){
553 if( rp->rhs[i]==sp ){
554 ErrorMsg(lemp->filename,0,
555 "The start symbol \"%s\" occurs on the \
556 right-hand side of a rule. This will result in a parser which \
557 does not work properly.",sp->name);
563 /* The basis configuration set for the first state
564 ** is all rules which have the start symbol as their
566 for(rp=sp->rule; rp; rp=rp->nextlhs){
567 struct config *newcfp;
568 newcfp = Configlist_addbasis(rp,0);
569 SetAdd(newcfp->fws,0);
572 /* Compute the first state. All other states will be
573 ** computed automatically during the computation of the first one.
574 ** The returned pointer to the first state is not used. */
575 (void)getstate(lemp);
579 /* Return a pointer to a state which is described by the configuration
580 ** list which has been built from calls to Configlist_add.
582 PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */
583 PRIVATE struct state *getstate(struct lemon *lemp)
585 struct config *cfp, *bp;
588 /* Extract the sorted basis of the new state. The basis was constructed
589 ** by prior calls to "Configlist_addbasis()". */
590 Configlist_sortbasis();
591 bp = Configlist_basis();
593 /* Get a state with the same basis */
594 stp = State_find(bp);
596 /* A state with the same basis already exists! Copy all the follow-set
597 ** propagation links from the state under construction into the
598 ** preexisting state, then return a pointer to the preexisting state */
599 struct config *x, *y;
600 for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){
601 Plink_copy(&y->bplp,x->bplp);
602 Plink_delete(x->fplp);
603 x->fplp = x->bplp = 0;
605 cfp = Configlist_return();
608 /* This really is a new state. Construct all the details */
609 Configlist_closure(lemp); /* Compute the configuration closure */
610 Configlist_sort(); /* Sort the configuration closure */
611 cfp = Configlist_return(); /* Get a pointer to the config list */
612 stp = State_new(); /* A new state structure */
614 stp->bp = bp; /* Remember the configuration basis */
615 stp->cfp = cfp; /* Remember the configuration closure */
616 stp->index = lemp->nstate++; /* Every state gets a sequence number */
617 stp->ap = 0; /* No actions, yet. */
618 State_insert(stp,stp->bp); /* Add to the state table */
619 buildshifts(lemp,stp); /* Recursively compute successor states */
624 /* Construct all successor states to the given state. A "successor"
625 ** state is any state which can be reached by a shift action.
627 PRIVATE void buildshifts(
629 struct state *stp) /* The state from which successors are computed */
631 struct config *cfp; /* For looping thru the config closure of "stp" */
632 struct config *bcfp; /* For the inner loop on config closure of "stp" */
633 struct config *new; /* */
634 struct symbol *sp; /* Symbol following the dot in configuration "cfp" */
635 struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */
636 struct state *newstp; /* A pointer to a successor state */
638 /* Each configuration becomes complete after it contibutes to a successor
639 ** state. Initially, all configurations are incomplete */
640 for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE;
642 /* Loop through all configurations of the state "stp" */
643 for(cfp=stp->cfp; cfp; cfp=cfp->next){
644 if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */
645 if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */
646 Configlist_reset(); /* Reset the new config set */
647 sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */
649 /* For every configuration in the state "stp" which has the symbol "sp"
650 ** following its dot, add the same configuration to the basis set under
651 ** construction but with the dot shifted one symbol to the right. */
652 for(bcfp=cfp; bcfp; bcfp=bcfp->next){
653 if( bcfp->status==COMPLETE ) continue; /* Already used */
654 if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
655 bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */
656 if( bsp!=sp ) continue; /* Must be same as for "cfp" */
657 bcfp->status = COMPLETE; /* Mark this config as used */
658 new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
659 Plink_add(&new->bplp,bcfp);
662 /* Get a pointer to the state described by the basis configuration set
663 ** constructed in the preceding loop */
664 newstp = getstate(lemp);
666 /* The state "newstp" is reached from the state "stp" by a shift action
667 ** on the symbol "sp" */
668 Action_add(&stp->ap,SHIFT,sp,newstp);
673 ** Construct the propagation links
675 void FindLinks(struct lemon *lemp)
678 struct config *cfp, *other;
682 /* Housekeeping detail:
683 ** Add to every propagate link a pointer back to the state to
684 ** which the link is attached. */
685 for(i=0; i<lemp->nstate; i++){
686 stp = lemp->sorted[i];
687 for(cfp=stp->cfp; cfp; cfp=cfp->next){
692 /* Convert all backlinks into forward links. Only the forward
693 ** links are used in the follow-set computation. */
694 for(i=0; i<lemp->nstate; i++){
695 stp = lemp->sorted[i];
696 for(cfp=stp->cfp; cfp; cfp=cfp->next){
697 for(plp=cfp->bplp; plp; plp=plp->next){
699 Plink_add(&other->fplp,cfp);
705 /* Compute all followsets.
707 ** A followset is the set of all symbols which can come immediately
708 ** after a configuration.
710 void FindFollowSets(struct lemon *lemp)
718 for(i=0; i<lemp->nstate; i++){
719 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
720 cfp->status = INCOMPLETE;
726 for(i=0; i<lemp->nstate; i++){
727 for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){
728 if( cfp->status==COMPLETE ) continue;
729 for(plp=cfp->fplp; plp; plp=plp->next){
730 change = SetUnion(plp->cfp->fws,cfp->fws);
732 plp->cfp->status = INCOMPLETE;
736 cfp->status = COMPLETE;
742 static int resolve_conflict(struct action *, struct action *);
744 /* Compute the reduce actions, and resolve conflicts.
746 void FindActions(struct lemon *lemp)
754 /* Add all of the reduce actions
755 ** A reduce action is added for each element of the followset of
756 ** a configuration which has its dot at the extreme right.
758 for(i=0; i<lemp->nstate; i++){ /* Loop over all states */
759 stp = lemp->sorted[i];
760 for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */
761 if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */
762 for(j=0; j<lemp->nterminal; j++){
763 if( SetFind(cfp->fws,j) ){
764 /* Add a reduce action to the state "stp" which will reduce by the
765 ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */
766 Action_add(&stp->ap,REDUCE,lemp->symbols[j],cfp->rp);
773 /* Add the accepting token */
775 sp = Symbol_find(lemp->start);
776 if( sp==0 ) sp = lemp->rule->lhs;
778 sp = lemp->rule->lhs;
780 /* Add to the first state (which is always the starting state of the
781 ** finite state machine) an action to ACCEPT if the lookahead is the
782 ** start nonterminal. */
783 Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0);
785 /* Resolve conflicts */
786 for(i=0; i<lemp->nstate; i++){
787 struct action *ap, *nap;
789 stp = lemp->sorted[i];
791 stp->ap = Action_sort(stp->ap);
792 for(ap=stp->ap; ap && ap->next; ap=nap){
793 for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){
794 /* The two actions "ap" and "nap" have the same lookahead.
795 ** Figure out which one should be used */
796 lemp->nconflict += resolve_conflict(ap,nap);
801 /* Report an error for each rule that can never be reduced. */
802 for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = BOOL_FALSE;
803 for(i=0; i<lemp->nstate; i++){
805 for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){
806 if( ap->type==REDUCE ) ap->x.rp->canReduce = BOOL_TRUE;
809 for(rp=lemp->rule; rp; rp=rp->next){
810 if( rp->canReduce ) continue;
811 ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n");
816 /* Resolve a conflict between the two given actions. If the
817 ** conflict can't be resolve, return non-zero.
820 ** To resolve a conflict, first look to see if either action
821 ** is on an error rule. In that case, take the action which
822 ** is not associated with the error rule. If neither or both
823 ** actions are associated with an error rule, then try to
824 ** use precedence to resolve the conflict.
826 ** If either action is a SHIFT, then it must be apx. This
827 ** function won't work if apx->type==REDUCE and apy->type==SHIFT.
829 static int resolve_conflict(
833 struct symbol *spx, *spy;
835 assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */
836 if( apx->type==SHIFT && apy->type==REDUCE ){
838 spy = apy->x.rp->precsym;
839 if( spy==0 || spx->prec<0 || spy->prec<0 ){
840 /* Not enough precedence information. */
841 apy->type = CONFLICT;
843 }else if( spx->prec>spy->prec ){ /* Lower precedence wins */
844 apy->type = RD_RESOLVED;
845 }else if( spx->prec<spy->prec ){
846 apx->type = SH_RESOLVED;
847 }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */
848 apy->type = RD_RESOLVED; /* associativity */
849 }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */
850 apx->type = SH_RESOLVED;
852 assert( spx->prec==spy->prec && spx->assoc==NONE );
853 apy->type = CONFLICT;
856 }else if( apx->type==REDUCE && apy->type==REDUCE ){
857 spx = apx->x.rp->precsym;
858 spy = apy->x.rp->precsym;
859 if( spx==0 || spy==0 || spx->prec<0 ||
860 spy->prec<0 || spx->prec==spy->prec ){
861 apy->type = CONFLICT;
863 }else if( spx->prec>spy->prec ){
864 apy->type = RD_RESOLVED;
865 }else if( spx->prec<spy->prec ){
866 apx->type = RD_RESOLVED;
869 /* Can't happen. Shifts have to come before Reduces on the
870 ** list because the reduces were added last. Hence, if apx->type==REDUCE
871 ** then it is impossible for apy->type==SHIFT */
875 /********************* From the file "configlist.c" *************************/
877 ** Routines to processing a configuration list and building a state
878 ** in the LEMON parser generator.
881 static struct config *freelist = 0; /* List of free configurations */
882 static struct config *current = 0; /* Top of list of configurations */
883 static struct config **currentend = 0; /* Last on list of configs */
884 static struct config *basis = 0; /* Top of list of basis configs */
885 static struct config **basisend = 0; /* End of list of basis configs */
887 /* Return a pointer to a new configuration */
888 PRIVATE struct config *newconfig(void){
893 freelist = (struct config *)malloc( sizeof(struct config)*amt );
895 fprintf(stderr,"Unable to allocate memory for a new configuration.");
898 for(i=0; i<amt-1; i++) freelist[i].next = &freelist[i+1];
899 freelist[amt-1].next = 0;
902 freelist = freelist->next;
906 /* The configuration "old" is no longer used */
907 PRIVATE void deleteconfig(struct config *old)
909 old->next = freelist;
913 /* Initialized the configuration list builder */
914 void Configlist_init(void){
916 currentend = ¤t;
923 /* Initialized the configuration list builder */
924 void Configlist_reset(void){
926 currentend = ¤t;
929 Configtable_clear(0);
933 /* Add another configuration to the configuration list */
934 struct config *Configlist_add(
935 struct rule *rp, /* The rule */
936 int dot) /* Index into the RHS of the rule where the dot goes */
938 struct config *cfp, model;
940 assert( currentend!=0 );
943 cfp = Configtable_find(&model);
950 cfp->fplp = cfp->bplp = 0;
954 currentend = &cfp->next;
955 Configtable_insert(cfp);
960 /* Add a basis configuration to the configuration list */
961 struct config *Configlist_addbasis(struct rule *rp, int dot)
963 struct config *cfp, model;
965 assert( basisend!=0 );
966 assert( currentend!=0 );
969 cfp = Configtable_find(&model);
976 cfp->fplp = cfp->bplp = 0;
980 currentend = &cfp->next;
983 Configtable_insert(cfp);
988 /* Compute the closure of the configuration list */
989 void Configlist_closure(struct lemon *lemp)
991 struct config *cfp, *newcfp;
992 struct rule *rp, *newrp;
993 struct symbol *sp, *xsp;
996 assert( currentend!=0 );
997 for(cfp=current; cfp; cfp=cfp->next){
1000 if( dot>=rp->nrhs ) continue;
1002 if( sp->type==NONTERMINAL ){
1003 if( sp->rule==0 && sp!=lemp->errsym ){
1004 ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.",
1008 for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
1009 newcfp = Configlist_add(newrp,0);
1010 for(i=dot+1; i<rp->nrhs; i++){
1012 if( xsp->type==TERMINAL ){
1013 SetAdd(newcfp->fws,xsp->index);
1016 SetUnion(newcfp->fws,xsp->firstset);
1017 if( xsp->lambda==BOOL_FALSE ) break;
1020 if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
1027 /* Sort the configuration list */
1028 void Configlist_sort(void){
1029 current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp);
1034 /* Sort the basis configuration list */
1035 void Configlist_sortbasis(void){
1036 basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp);
1041 /* Return a pointer to the head of the configuration list and
1042 ** reset the list */
1043 struct config *Configlist_return(void){
1051 /* Return a pointer to the head of the configuration list and
1052 ** reset the list */
1053 struct config *Configlist_basis(void){
1061 /* Free all elements of the given configuration list */
1062 void Configlist_eat(struct config *cfp)
1064 struct config *nextcfp;
1065 for(; cfp; cfp=nextcfp){
1066 nextcfp = cfp->next;
1067 assert( cfp->fplp==0 );
1068 assert( cfp->bplp==0 );
1069 if( cfp->fws ) SetFree(cfp->fws);
1074 /***************** From the file "error.c" *********************************/
1076 ** Code for printing error message.
1079 /* Find a good place to break "msg" so that its length is at least "min"
1080 ** but no more than "max". Make the point as close to max as possible.
1082 static int findbreak(char *msg, int min, int max)
1086 for(i=spot=min; i<=max; i++){
1088 if( c=='\t' ) msg[i] = ' ';
1089 if( c=='\n' ){ msg[i] = ' '; spot = i; break; }
1090 if( c==0 ){ spot = i; break; }
1091 if( c=='-' && i<max-1 ) spot = i+1;
1092 if( c==' ' ) spot = i;
1098 ** The error message is split across multiple lines if necessary. The
1099 ** splits occur at a space, if there is a space available near the end
1102 #define ERRMSGSIZE 10000 /* Hope this is big enough. No way to error check */
1103 #define LINEWIDTH 79 /* Max width of any output line */
1104 #define PREFIXLIMIT 30 /* Max width of the prefix on each line */
1105 void ErrorMsg(char *filename, int lineno, char *format, ...)
1107 char errmsg[ERRMSGSIZE];
1108 char prefix[PREFIXLIMIT+10];
1113 int end, restart, base;
1115 va_start(ap, format);
1116 /* Prepare a prefix to be prepended to every output line */
1118 sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno);
1120 sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename);
1122 prefixsize = strlen(prefix);
1123 availablewidth = LINEWIDTH - prefixsize;
1125 /* Generate the error message */
1126 vsprintf(errmsg,format,ap);
1128 errmsgsize = strlen(errmsg);
1129 /* Remove trailing '\n's from the error message. */
1130 while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){
1131 errmsg[--errmsgsize] = 0;
1134 /* Print the error message */
1136 while( errmsg[base]!=0 ){
1137 end = restart = findbreak(&errmsg[base],0,availablewidth);
1139 while( errmsg[restart]==' ' ) restart++;
1140 fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]);
1144 /**************** From the file "main.c" ************************************/
1146 ** Main program file for the LEMON parser generator.
1149 /* Report an out-of-memory condition and abort. This function
1150 ** is used mostly by the "MemoryCheck" macro in struct.h
1152 void memory_error(void){
1153 fprintf(stderr,"Out of memory. Aborting...\n");
1157 /* Locates the basename in a string possibly containing paths,
1158 * including forward-slash and backward-slash delimiters on Windows,
1159 * and allocates a new string containing just the basename.
1160 * Returns the pointer to that string.
1163 make_basename(char* fullname)
1168 /* Find the last forward slash */
1169 cp = strrchr(fullname, '/');
1172 /* On Windows, if no forward slash was found, look ofr
1175 cp = strrchr(fullname, '\\');
1179 new_string = malloc( strlen(fullname) );
1180 strcpy(new_string, fullname);
1183 /* skip the slash */
1185 new_string = malloc( strlen(cp) );
1186 strcpy(new_string, cp);
1195 /* The main program. Parse the command line and do it... */
1196 int main(int argc _U_, char **argv)
1198 static int version = 0;
1199 static int rpflag = 0;
1200 static int basisflag = 0;
1201 static int compress = 0;
1202 static int quiet = 0;
1203 static int statistics = 0;
1204 static int mhflag = 0;
1205 static char *outdirname = NULL;
1206 static char *templatename = NULL;
1207 static struct s_options options[] = {
1208 {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."},
1209 {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."},
1210 {OPT_STR, "d", (char*)&outdirname, "Output directory name."},
1211 {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."},
1212 {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"},
1213 {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."},
1214 {OPT_FLAG, "s", (char*)&statistics, "Print parser stats to standard output."},
1215 {OPT_STR, "t", (char*)&templatename, "Template file to use."},
1216 {OPT_FLAG, "x", (char*)&version, "Print the version number."},
1222 optinit(argv,options,stderr);
1224 printf("Lemon version 1.0\n"
1225 "Copyright 1991-1997 by D. Richard Hipp\n"
1226 "Freely distributable under the GNU Public License.\n"
1230 if( optnargs()!=1 ){
1231 fprintf(stderr,"Exactly one filename argument is required.\n");
1236 /* Initialize the machine */
1240 lem.argv0 = argv[0];
1241 lem.filename = get_optarg(0);
1242 lem.basisflag = basisflag;
1244 lem.name = lem.include = lem.arg = lem.tokentype = lem.start = 0;
1246 lem.error = lem.overflow = lem.failure = lem.accept = lem.tokendest =
1247 lem.tokenprefix = lem.outname = lem.extracode = 0;
1250 lem.errsym = Symbol_new("error");
1251 lem.outdirname = outdirname;
1252 lem.templatename = templatename;
1253 lem.basename = make_basename(lem.filename);
1255 /* Parse the input file */
1257 if( lem.errorcnt ) exit(lem.errorcnt);
1259 fprintf(stderr,"Empty grammar.\n");
1263 /* Count and index the symbols of the grammar */
1264 lem.nsymbol = Symbol_count();
1265 Symbol_new("{default}");
1266 lem.symbols = Symbol_arrayof();
1267 qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*),Symbolcmpp);
1268 for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
1269 for(i=1; safe_isupper(lem.symbols[i]->name[0]); i++);
1272 /* Generate a reprint of the grammar, if requested on the command line */
1276 /* Initialize the size for all follow and first sets */
1277 SetSize(lem.nterminal);
1279 /* Find the precedence for every production rule (that has one) */
1280 FindRulePrecedences(&lem);
1282 /* Compute the lambda-nonterminals and the first-sets for every
1284 FindFirstSets(&lem);
1286 /* Compute all LR(0) states. Also record follow-set propagation
1287 ** links so that the follow-set can be computed later */
1290 lem.sorted = State_arrayof();
1292 /* Tie up loose ends on the propagation links */
1295 /* Compute the follow set of every reducible configuration */
1296 FindFollowSets(&lem);
1298 /* Compute the action tables */
1301 /* Compress the action tables */
1302 if( compress==0 ) CompressTables(&lem);
1304 /* Generate a report of the parser generated. (the "y.output" file) */
1305 if( !quiet ) ReportOutput(&lem);
1307 /* Generate the source code for the parser */
1308 ReportTable(&lem, mhflag);
1310 /* Produce a header file for use by the scanner. (This step is
1311 ** omitted if the "-m" option is used because makeheaders will
1312 ** generate the file for us.) */
1313 if( !mhflag ) ReportHeader(&lem);
1316 printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n",
1317 lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule);
1318 printf(" %d states, %d parser table entries, %d conflicts\n",
1319 lem.nstate, lem.tablesize, lem.nconflict);
1321 if( lem.nconflict ){
1322 fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
1324 exit(lem.errorcnt + lem.nconflict);
1326 /******************** From the file "msort.c" *******************************/
1328 ** A generic merge-sort program.
1331 ** Let "ptr" be a pointer to some structure which is at the head of
1332 ** a null-terminated list. Then to sort the list call:
1334 ** ptr = msort(ptr,&(ptr->next),cmpfnc);
1336 ** In the above, "cmpfnc" is a pointer to a function which compares
1337 ** two instances of the structure and returns an integer, as in
1338 ** strcmp. The second argument is a pointer to the pointer to the
1339 ** second element of the linked list. This address is used to compute
1340 ** the offset to the "next" field within the structure. The offset to
1341 ** the "next" field must be constant for all structures in the list.
1343 ** The function returns a new pointer which is the head of the list
1351 ** Return a pointer to the next structure in the linked list.
1353 #define NEXT(A) (*(char**)(((char *)A)+offset))
1357 ** a: A sorted, null-terminated linked list. (May be null).
1358 ** b: A sorted, null-terminated linked list. (May be null).
1359 ** cmp: A pointer to the comparison function.
1360 ** offset: Offset in the structure to the "next" field.
1363 ** A pointer to the head of a sorted list containing the elements
1367 ** The "next" pointers for elements in the lists a and b are
1370 static char *merge(char *a, char *b, int (*cmp)(const void *, const void *),
1380 if( (*cmp)(a,b)<0 ){
1389 if( (*cmp)(a,b)<0 ){
1399 if( a ) NEXT(ptr) = a;
1407 ** list: Pointer to a singly-linked list of structures.
1408 ** next: Pointer to pointer to the second element of the list.
1409 ** cmp: A comparison function.
1412 ** A pointer to the head of a sorted list containing the elements
1413 ** orginally in list.
1416 ** The "next" pointers for elements in list are changed.
1419 char *msort(char *list, char **next, int (*cmp)(const void *, const void *))
1423 char *set[LISTSIZE];
1425 offset = (char *)next - (char *)list;
1426 for(i=0; i<LISTSIZE; i++) set[i] = 0;
1431 for(i=0; i<LISTSIZE-1 && set[i]!=0; i++){
1432 ep = merge(ep,set[i],cmp,offset);
1438 for(i=0; i<LISTSIZE; i++) if( set[i] ) ep = merge(ep,set[i],cmp,offset);
1441 /************************ From the file "option.c" **************************/
1443 static struct s_options *op;
1444 static FILE *errstream;
1446 #define ISOPT(X) ((X)[0]=='-'||(X)[0]=='+'||strchr((X),'=')!=0)
1449 ** Print the command line with a carrot pointing to the k-th character
1450 ** of the n-th field.
1452 static void errline(int n, int k, FILE *err)
1456 if( argv[0] ) fprintf(err,"%s",argv[0]);
1457 spcnt = strlen(argv[0]) + 1;
1458 for(i=1; i<n && argv[i]; i++){
1459 fprintf(err," %s",argv[i]);
1460 spcnt += strlen(argv[i]+1);
1463 for(; argv[i]; i++) fprintf(err," %s",argv[i]);
1465 fprintf(err,"\n%*s^-- here\n",spcnt,"");
1467 fprintf(err,"\n%*shere --^\n",spcnt-7,"");
1472 ** Return the index of the N-th non-switch argument. Return -1
1473 ** if N is out of range.
1475 static int argindex(int n)
1479 if( argv!=0 && *argv!=0 ){
1480 for(i=1; argv[i]; i++){
1481 if( dashdash || !ISOPT(argv[i]) ){
1482 if( n==0 ) return i;
1485 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1491 static char emsg[] = "Command line syntax error: ";
1494 ** Process a flag command line argument.
1496 static int handleflags(int i, FILE *err)
1501 for(j=0; op[j].label; j++){
1502 if( strcmp(&argv[i][1],op[j].label)==0 ) break;
1504 v = argv[i][0]=='-' ? 1 : 0;
1505 if( op[j].label==0 ){
1507 fprintf(err,"%sundefined option.\n",emsg);
1511 }else if( op[j].type==OPT_FLAG ){
1512 *((int*)op[j].arg) = v;
1513 }else if( op[j].type==OPT_FFLAG ){
1514 (*(void(*)())(op[j].arg))(v);
1517 fprintf(err,"%smissing argument on switch.\n",emsg);
1526 ** Process a command line switch which has an argument.
1528 static int handleswitch(int i, FILE *err)
1536 cp = strchr(argv[i],'=');
1538 for(j=0; op[j].label; j++){
1539 if( strcmp(argv[i],op[j].label)==0 ) break;
1542 if( op[j].label==0 ){
1544 fprintf(err,"%sundefined option.\n",emsg);
1550 switch( op[j].type ){
1554 fprintf(err,"%soption requires an argument.\n",emsg);
1561 dv = strtod(cp,&end);
1564 fprintf(err,"%sillegal character in floating-point argument.\n",emsg);
1565 errline(i,(int)(end-argv[i]),err);
1572 lv = strtol(cp,&end,0);
1575 fprintf(err,"%sillegal character in integer argument.\n",emsg);
1576 errline(i,(int)(end-argv[i]),err);
1586 switch( op[j].type ){
1591 *(double*)(op[j].arg) = dv;
1594 (*(void(*)())(op[j].arg))(dv);
1597 *(int*)(op[j].arg) = lv;
1600 (*(void(*)())(op[j].arg))((int)lv);
1603 *(char**)(op[j].arg) = sv;
1606 (*(void(*)())(op[j].arg))(sv);
1613 int optinit(char **a, struct s_options *o, FILE *err)
1619 if( argv && *argv && op ){
1621 for(i=1; argv[i]; i++){
1622 if( argv[i][0]=='+' || argv[i][0]=='-' ){
1623 errcnt += handleflags(i,err);
1624 }else if( strchr(argv[i],'=') ){
1625 errcnt += handleswitch(i,err);
1630 fprintf(err,"Valid command line options for \"%s\" are:\n",*a);
1641 if( argv!=0 && argv[0]!=0 ){
1642 for(i=1; argv[i]; i++){
1643 if( dashdash || !ISOPT(argv[i]) ) cnt++;
1644 if( strcmp(argv[i],"--")==0 ) dashdash = 1;
1650 char *get_optarg(int n)
1654 return i>=0 ? argv[i] : 0;
1657 void get_opterr(int n)
1661 if( i>=0 ) errline(i,0,errstream);
1664 void optprint(void){
1668 for(i=0; op[i].label; i++){
1669 len = strlen(op[i].label) + 1;
1670 switch( op[i].type ){
1676 len += 9; /* length of "<integer>" */
1680 len += 6; /* length of "<real>" */
1684 len += 8; /* length of "<string>" */
1687 if( len>max ) max = len;
1689 for(i=0; op[i].label; i++){
1690 switch( op[i].type ){
1693 fprintf(errstream," -%-*s %s\n",max,op[i].label,op[i].message);
1697 fprintf(errstream," %s=<integer>%*s %s\n",op[i].label,
1698 (int)(max-strlen(op[i].label)-9),"",op[i].message);
1702 fprintf(errstream," %s=<real>%*s %s\n",op[i].label,
1703 (int)(max-strlen(op[i].label)-6),"",op[i].message);
1707 fprintf(errstream," %s=<string>%*s %s\n",op[i].label,
1708 (int)(max-strlen(op[i].label)-8),"",op[i].message);
1713 /*********************** From the file "parse.c" ****************************/
1715 ** Input file parser for the LEMON parser generator.
1718 /* The state of the parser */
1720 char *filename; /* Name of the input file */
1721 int tokenlineno; /* Linenumber at which current token starts */
1722 int errorcnt; /* Number of errors so far */
1723 char *tokenstart; /* Text of current token */
1724 struct lemon *gp; /* Global state vector */
1727 WAITING_FOR_DECL_OR_RULE,
1728 WAITING_FOR_DECL_KEYWORD,
1729 WAITING_FOR_DECL_ARG,
1730 WAITING_FOR_PRECEDENCE_SYMBOL,
1740 RESYNC_AFTER_RULE_ERROR,
1741 RESYNC_AFTER_DECL_ERROR,
1742 WAITING_FOR_DESTRUCTOR_SYMBOL,
1743 WAITING_FOR_DATATYPE_SYMBOL
1744 } state; /* The state of the parser */
1745 struct symbol *lhs; /* Left-hand side of current rule */
1746 char *lhsalias; /* Alias for the LHS */
1747 int nrhs; /* Number of right-hand side symbols seen */
1748 struct symbol *rhs[MAXRHS]; /* RHS symbols */
1749 char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
1750 struct rule *prevrule; /* Previous rule parsed */
1751 char *declkeyword; /* Keyword of a declaration */
1752 char **declargslot; /* Where the declaration argument should be put */
1753 int *decllnslot; /* Where the declaration linenumber is put */
1754 enum e_assoc declassoc; /* Assign this association to decl arguments */
1755 int preccounter; /* Assign this precedence to decl arguments */
1756 struct rule *firstrule; /* Pointer to first rule in the grammar */
1757 struct rule *lastrule; /* Pointer to the most recently parsed rule */
1760 /* Parse a single token */
1761 static void parseonetoken(struct pstate *psp)
1764 x = Strsafe(psp->tokenstart); /* Save the token permanently */
1766 printf("%s:%d: Token=[%s] state=%d\n",psp->filename,psp->tokenlineno,
1769 switch( psp->state ){
1772 psp->preccounter = 0;
1773 psp->firstrule = psp->lastrule = 0;
1775 /* Fall thru to next case */
1776 case WAITING_FOR_DECL_OR_RULE:
1778 psp->state = WAITING_FOR_DECL_KEYWORD;
1779 }else if( safe_islower(x[0]) ){
1780 psp->lhs = Symbol_new(x);
1783 psp->state = WAITING_FOR_ARROW;
1784 }else if( x[0]=='{' ){
1785 if( psp->prevrule==0 ){
1786 ErrorMsg(psp->filename,psp->tokenlineno,
1787 "There is not prior rule opon which to attach the code \
1788 fragment which begins on this line.");
1790 }else if( psp->prevrule->code!=0 ){
1791 ErrorMsg(psp->filename,psp->tokenlineno,
1792 "Code fragment beginning on this line is not the first \
1793 to follow the previous rule.");
1796 psp->prevrule->line = psp->tokenlineno;
1797 psp->prevrule->code = &x[1];
1799 }else if( x[0]=='[' ){
1800 psp->state = PRECEDENCE_MARK_1;
1802 ErrorMsg(psp->filename,psp->tokenlineno,
1803 "Token \"%s\" should be either \"%%\" or a nonterminal name.",
1808 case PRECEDENCE_MARK_1:
1809 if( !safe_isupper(x[0]) ){
1810 ErrorMsg(psp->filename,psp->tokenlineno,
1811 "The precedence symbol must be a terminal.");
1813 }else if( psp->prevrule==0 ){
1814 ErrorMsg(psp->filename,psp->tokenlineno,
1815 "There is no prior rule to assign precedence \"[%s]\".",x);
1817 }else if( psp->prevrule->precsym!=0 ){
1818 ErrorMsg(psp->filename,psp->tokenlineno,
1819 "Precedence mark on this line is not the first \
1820 to follow the previous rule.");
1823 psp->prevrule->precsym = Symbol_new(x);
1825 psp->state = PRECEDENCE_MARK_2;
1827 case PRECEDENCE_MARK_2:
1829 ErrorMsg(psp->filename,psp->tokenlineno,
1830 "Missing \"]\" on precedence mark.");
1833 psp->state = WAITING_FOR_DECL_OR_RULE;
1835 case WAITING_FOR_ARROW:
1836 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
1837 psp->state = IN_RHS;
1838 }else if( x[0]=='(' ){
1839 psp->state = LHS_ALIAS_1;
1841 ErrorMsg(psp->filename,psp->tokenlineno,
1842 "Expected to see a \":\" following the LHS symbol \"%s\".",
1845 psp->state = RESYNC_AFTER_RULE_ERROR;
1849 if( safe_isalpha(x[0]) ){
1851 psp->state = LHS_ALIAS_2;
1853 ErrorMsg(psp->filename,psp->tokenlineno,
1854 "\"%s\" is not a valid alias for the LHS \"%s\"\n",
1857 psp->state = RESYNC_AFTER_RULE_ERROR;
1862 psp->state = LHS_ALIAS_3;
1864 ErrorMsg(psp->filename,psp->tokenlineno,
1865 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
1867 psp->state = RESYNC_AFTER_RULE_ERROR;
1871 if( x[0]==':' && x[1]==':' && x[2]=='=' ){
1872 psp->state = IN_RHS;
1874 ErrorMsg(psp->filename,psp->tokenlineno,
1875 "Missing \"->\" following: \"%s(%s)\".",
1876 psp->lhs->name,psp->lhsalias);
1878 psp->state = RESYNC_AFTER_RULE_ERROR;
1884 rp = (struct rule *)malloc( sizeof(struct rule) +
1885 sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs );
1887 ErrorMsg(psp->filename,psp->tokenlineno,
1888 "Can't allocate enough memory for this rule.");
1893 rp->ruleline = psp->tokenlineno;
1894 rp->rhs = (struct symbol**)&rp[1];
1895 rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]);
1896 for(i=0; i<psp->nrhs; i++){
1897 rp->rhs[i] = psp->rhs[i];
1898 rp->rhsalias[i] = psp->alias[i];
1901 rp->lhsalias = psp->lhsalias;
1902 rp->nrhs = psp->nrhs;
1905 rp->index = psp->gp->nrule++;
1906 rp->nextlhs = rp->lhs->rule;
1909 if( psp->firstrule==0 ){
1910 psp->firstrule = psp->lastrule = rp;
1912 psp->lastrule->next = rp;
1917 psp->state = WAITING_FOR_DECL_OR_RULE;
1918 }else if( safe_isalpha(x[0]) ){
1919 if( psp->nrhs>=MAXRHS ){
1920 ErrorMsg(psp->filename,psp->tokenlineno,
1921 "Too many symbol on RHS or rule beginning at \"%s\".",
1924 psp->state = RESYNC_AFTER_RULE_ERROR;
1926 psp->rhs[psp->nrhs] = Symbol_new(x);
1927 psp->alias[psp->nrhs] = 0;
1930 }else if( x[0]=='(' && psp->nrhs>0 ){
1931 psp->state = RHS_ALIAS_1;
1933 ErrorMsg(psp->filename,psp->tokenlineno,
1934 "Illegal character on RHS of rule: \"%s\".",x);
1936 psp->state = RESYNC_AFTER_RULE_ERROR;
1940 if( safe_isalpha(x[0]) ){
1941 psp->alias[psp->nrhs-1] = x;
1942 psp->state = RHS_ALIAS_2;
1944 ErrorMsg(psp->filename,psp->tokenlineno,
1945 "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n",
1946 x,psp->rhs[psp->nrhs-1]->name);
1948 psp->state = RESYNC_AFTER_RULE_ERROR;
1953 psp->state = IN_RHS;
1955 ErrorMsg(psp->filename,psp->tokenlineno,
1956 "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias);
1958 psp->state = RESYNC_AFTER_RULE_ERROR;
1961 case WAITING_FOR_DECL_KEYWORD:
1962 if( safe_isalpha(x[0]) ){
1963 psp->declkeyword = x;
1964 psp->declargslot = 0;
1965 psp->decllnslot = 0;
1966 psp->state = WAITING_FOR_DECL_ARG;
1967 if( strcmp(x,"name")==0 ){
1968 psp->declargslot = &(psp->gp->name);
1969 }else if( strcmp(x,"include")==0 ){
1970 psp->declargslot = &(psp->gp->include);
1971 psp->decllnslot = &psp->gp->includeln;
1972 }else if( strcmp(x,"code")==0 ){
1973 psp->declargslot = &(psp->gp->extracode);
1974 psp->decllnslot = &psp->gp->extracodeln;
1975 }else if( strcmp(x,"token_destructor")==0 ){
1976 psp->declargslot = &psp->gp->tokendest;
1977 psp->decllnslot = &psp->gp->tokendestln;
1978 }else if( strcmp(x,"token_prefix")==0 ){
1979 psp->declargslot = &psp->gp->tokenprefix;
1980 }else if( strcmp(x,"syntax_error")==0 ){
1981 psp->declargslot = &(psp->gp->error);
1982 psp->decllnslot = &psp->gp->errorln;
1983 }else if( strcmp(x,"parse_accept")==0 ){
1984 psp->declargslot = &(psp->gp->accept);
1985 psp->decllnslot = &psp->gp->acceptln;
1986 }else if( strcmp(x,"parse_failure")==0 ){
1987 psp->declargslot = &(psp->gp->failure);
1988 psp->decllnslot = &psp->gp->failureln;
1989 }else if( strcmp(x,"stack_overflow")==0 ){
1990 psp->declargslot = &(psp->gp->overflow);
1991 psp->decllnslot = &psp->gp->overflowln;
1992 }else if( strcmp(x,"extra_argument")==0 ){
1993 psp->declargslot = &(psp->gp->arg);
1994 }else if( strcmp(x,"token_type")==0 ){
1995 psp->declargslot = &(psp->gp->tokentype);
1996 }else if( strcmp(x,"stack_size")==0 ){
1997 psp->declargslot = &(psp->gp->stacksize);
1998 }else if( strcmp(x,"start_symbol")==0 ){
1999 psp->declargslot = &(psp->gp->start);
2000 }else if( strcmp(x,"left")==0 ){
2002 psp->declassoc = LEFT;
2003 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2004 }else if( strcmp(x,"right")==0 ){
2006 psp->declassoc = RIGHT;
2007 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2008 }else if( strcmp(x,"nonassoc")==0 ){
2010 psp->declassoc = NONE;
2011 psp->state = WAITING_FOR_PRECEDENCE_SYMBOL;
2012 }else if( strcmp(x,"destructor")==0 ){
2013 psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL;
2014 }else if( strcmp(x,"type")==0 ){
2015 psp->state = WAITING_FOR_DATATYPE_SYMBOL;
2017 ErrorMsg(psp->filename,psp->tokenlineno,
2018 "Unknown declaration keyword: \"%%%s\".",x);
2020 psp->state = RESYNC_AFTER_DECL_ERROR;
2023 ErrorMsg(psp->filename,psp->tokenlineno,
2024 "Illegal declaration keyword: \"%s\".",x);
2026 psp->state = RESYNC_AFTER_DECL_ERROR;
2029 case WAITING_FOR_DESTRUCTOR_SYMBOL:
2030 if( !safe_isalpha(x[0]) ){
2031 ErrorMsg(psp->filename,psp->tokenlineno,
2032 "Symbol name missing after %%destructor keyword");
2034 psp->state = RESYNC_AFTER_DECL_ERROR;
2036 struct symbol *sp = Symbol_new(x);
2037 psp->declargslot = &sp->destructor;
2038 psp->decllnslot = &sp->destructorln;
2039 psp->state = WAITING_FOR_DECL_ARG;
2042 case WAITING_FOR_DATATYPE_SYMBOL:
2043 if( !safe_isalpha(x[0]) ){
2044 ErrorMsg(psp->filename,psp->tokenlineno,
2045 "Symbol name missing after %%destructor keyword");
2047 psp->state = RESYNC_AFTER_DECL_ERROR;
2049 struct symbol *sp = Symbol_new(x);
2050 psp->declargslot = &sp->datatype;
2051 psp->decllnslot = 0;
2052 psp->state = WAITING_FOR_DECL_ARG;
2055 case WAITING_FOR_PRECEDENCE_SYMBOL:
2057 psp->state = WAITING_FOR_DECL_OR_RULE;
2058 }else if( safe_isupper(x[0]) ){
2062 ErrorMsg(psp->filename,psp->tokenlineno,
2063 "Symbol \"%s\" has already be given a precedence.",x);
2066 sp->prec = psp->preccounter;
2067 sp->assoc = psp->declassoc;
2070 ErrorMsg(psp->filename,psp->tokenlineno,
2071 "Can't assign a precedence to \"%s\".",x);
2075 case WAITING_FOR_DECL_ARG:
2076 if( (x[0]=='{' || x[0]=='\"' || safe_isalnum(x[0])) ){
2077 if( *(psp->declargslot)!=0 ){
2078 ErrorMsg(psp->filename,psp->tokenlineno,
2079 "The argument \"%s\" to declaration \"%%%s\" is not the first.",
2080 x[0]=='\"' ? &x[1] : x,psp->declkeyword);
2082 psp->state = RESYNC_AFTER_DECL_ERROR;
2084 *(psp->declargslot) = (x[0]=='\"' || x[0]=='{') ? &x[1] : x;
2085 if( psp->decllnslot ) *psp->decllnslot = psp->tokenlineno;
2086 psp->state = WAITING_FOR_DECL_OR_RULE;
2089 ErrorMsg(psp->filename,psp->tokenlineno,
2090 "Illegal argument to %%%s: %s",psp->declkeyword,x);
2092 psp->state = RESYNC_AFTER_DECL_ERROR;
2095 case RESYNC_AFTER_RULE_ERROR:
2096 /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2098 case RESYNC_AFTER_DECL_ERROR:
2099 if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
2100 if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
2105 /* In spite of its name, this function is really a scanner. It read
2106 ** in the entire input file (all at once) then tokenizes it. Each
2107 ** token is passed to the function "parseonetoken" which builds all
2108 ** the appropriate data structures in the global state vector "gp".
2110 void Parse(struct lemon *gp)
2122 ps.filename = gp->filename;
2124 ps.state = INITIALIZE;
2126 /* Begin by reading the input file */
2127 fp = fopen(ps.filename,"rb");
2129 ErrorMsg(ps.filename,0,"Can't open this file for reading.");
2134 filesize = ftell(fp);
2136 /* XXX - what if filesize is bigger than the maximum size_t value? */
2137 filebuf = (char *)malloc( filesize+1 );
2139 ErrorMsg(ps.filename,0,"Can't allocate %ld of memory to hold this file.",
2144 if( fread(filebuf,1,filesize,fp)!=(size_t)filesize ){
2145 ErrorMsg(ps.filename,0,"Can't read in all %ld bytes of this file.",
2152 filebuf[filesize] = 0;
2154 /* Now scan the text of the input file */
2156 for(cp=filebuf; (c= *cp)!=0; ){
2157 if( c=='\n' ) lineno++; /* Keep track of the line number */
2158 if( safe_isspace(c) ){ cp++; continue; } /* Skip all white space */
2159 if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */
2161 while( (c= *cp)!=0 && c!='\n' ) cp++;
2164 if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */
2166 while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){
2167 if( c=='\n' ) lineno++;
2173 ps.tokenstart = cp; /* Mark the beginning of the token */
2174 ps.tokenlineno = lineno; /* Linenumber on which token begins */
2175 if( c=='\"' ){ /* String literals */
2177 while( (c= *cp)!=0 && c!='\"' ){
2178 if( c=='\n' ) lineno++;
2182 ErrorMsg(ps.filename,startline,
2183 "String starting on this line is not terminated before the end of the file.");
2189 }else if( c=='{' ){ /* A block of C code */
2192 for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){
2193 if( c=='\n' ) lineno++;
2194 else if( c=='{' ) level++;
2195 else if( c=='}' ) level--;
2196 else if( c=='/' && cp[1]=='*' ){ /* Skip comments */
2200 while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){
2201 if( c=='\n' ) lineno++;
2205 }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */
2207 while( (c= *cp)!=0 && c!='\n' ) cp++;
2209 }else if( c=='\'' || c=='\"' ){ /* String a character literals */
2210 char startchar, prevc;
2213 for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){
2214 if( c=='\n' ) lineno++;
2215 if( prevc=='\\' ) prevc = 0;
2221 ErrorMsg(ps.filename,startline,
2222 "C code starting on this line is not terminated before the end of the file.");
2228 }else if( safe_isalnum(c) ){ /* Identifiers */
2229 while( (c= *cp)!=0 && (safe_isalnum(c) || c=='_') ) cp++;
2231 }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
2234 }else{ /* All other (one character) operators */
2239 *cp = 0; /* Null terminate the token */
2240 parseonetoken(&ps); /* Parse the token */
2241 *cp = c; /* Restore the buffer */
2244 free(filebuf); /* Release the buffer after parsing */
2245 gp->rule = ps.firstrule;
2246 gp->errorcnt = ps.errorcnt;
2248 /*************************** From the file "plink.c" *********************/
2250 ** Routines processing configuration follow-set propagation links
2251 ** in the LEMON parser generator.
2253 static struct plink *plink_freelist = 0;
2255 /* Allocate a new plink */
2256 struct plink *Plink_new(void){
2259 if( plink_freelist==0 ){
2262 plink_freelist = (struct plink *)malloc( sizeof(struct plink)*amt );
2263 if( plink_freelist==0 ){
2265 "Unable to allocate memory for a new follow-set propagation link.\n");
2268 for(i=0; i<amt-1; i++) plink_freelist[i].next = &plink_freelist[i+1];
2269 plink_freelist[amt-1].next = 0;
2271 new = plink_freelist;
2272 plink_freelist = plink_freelist->next;
2276 /* Add a plink to a plink list */
2277 void Plink_add(struct plink **plpp, struct config *cfp)
2286 /* Transfer every plink on the list "from" to the list "to" */
2287 void Plink_copy(struct plink **to, struct plink *from)
2289 struct plink *nextpl;
2291 nextpl = from->next;
2298 /* Delete every plink on the list */
2299 void Plink_delete(struct plink *plp)
2301 struct plink *nextpl;
2305 plp->next = plink_freelist;
2306 plink_freelist = plp;
2310 /*********************** From the file "report.c" **************************/
2312 ** Procedures for generating reports and tables in the LEMON parser generator.
2315 /* Generate a filename with the given suffix. Space to hold the
2316 ** name comes from malloc() and must be freed by the calling
2319 PRIVATE char *file_makename(char *pattern, char *suffix)
2324 name = malloc( strlen(pattern) + strlen(suffix) + 5 );
2326 fprintf(stderr,"Can't allocate space for a filename.\n");
2329 strcpy(name,pattern);
2330 cp = strrchr(name,'.');
2332 strcat(name,suffix);
2336 /* Generate a filename with the given suffix. Uses only
2337 ** the basename of the input file, not the entire path. This
2338 ** is useful for creating output files when using outdirname.
2339 ** Space to hold this name comes from malloc() and must be
2340 ** freed by the calling function.
2342 PRIVATE char *file_makename_using_basename(struct lemon *lemp, char *suffix)
2344 return file_makename(lemp->basename, suffix);
2347 /* Open a file with a name based on the name of the input file,
2348 ** but with a different (specified) suffix, and return a pointer
2349 ** to the stream. Prepend outdirname for both reads and writes, because
2350 ** the only time we read is when checking for an already-produced
2351 ** header file, which should exist in the output directory, not the
2352 ** input directory. If we ever need to file_open(,,"r") on the input
2353 ** side, we should add another arg to file_open() indicating which
2354 ** directory, ("input, "output", or "other") we should deal with.
2356 PRIVATE FILE *file_open(struct lemon *lemp, char *suffix, char *mode)
2361 if( lemp->outname ) free(lemp->outname);
2362 name = file_makename_using_basename(lemp, suffix);
2364 if ( lemp->outdirname != NULL ) {
2365 lemp->outname = malloc( strlen(lemp->outdirname) + strlen(name) + 2);
2366 if ( lemp->outname == 0 ) {
2367 fprintf(stderr, "Can't allocate space for dir/filename");
2370 strcpy(lemp->outname, lemp->outdirname);
2372 strcat(lemp->outname, "\\");
2374 strcat(lemp->outname, "/");
2376 strcat(lemp->outname, name);
2380 lemp->outname = name;
2383 fp = fopen(lemp->outname,mode);
2384 if( fp==0 && *mode=='w' ){
2385 fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
2392 /* Duplicate the input file without comments and without actions
2394 void Reprint(struct lemon *lemp)
2398 int i, j, maxlen, len, ncolumns, skip;
2399 printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename);
2401 for(i=0; i<lemp->nsymbol; i++){
2402 sp = lemp->symbols[i];
2403 len = strlen(sp->name);
2404 if( len>maxlen ) maxlen = len;
2406 ncolumns = 76/(maxlen+5);
2407 if( ncolumns<1 ) ncolumns = 1;
2408 skip = (lemp->nsymbol + ncolumns - 1)/ncolumns;
2409 for(i=0; i<skip; i++){
2411 for(j=i; j<lemp->nsymbol; j+=skip){
2412 sp = lemp->symbols[j];
2413 assert( sp->index==j );
2414 printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
2418 for(rp=lemp->rule; rp; rp=rp->next){
2419 printf("%s",rp->lhs->name);
2420 /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
2422 for(i=0; i<rp->nrhs; i++){
2423 printf(" %s",rp->rhs[i]->name);
2424 /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
2427 if( rp->precsym ) printf(" [%s]",rp->precsym->name);
2428 /* if( rp->code ) printf("\n %s",rp->code); */
2433 void ConfigPrint(FILE *fp, struct config *cfp)
2438 fprintf(fp,"%s ::=",rp->lhs->name);
2439 for(i=0; i<=rp->nrhs; i++){
2440 if( i==cfp->dot ) fprintf(fp," *");
2441 if( i==rp->nrhs ) break;
2442 fprintf(fp," %s",rp->rhs[i]->name);
2449 PRIVATE void SetPrint(FILE *out, char *set, struct lemon *lemp)
2454 fprintf(out,"%12s[","");
2455 for(i=0; i<lemp->nterminal; i++){
2456 if( SetFind(set,i) ){
2457 fprintf(out,"%s%s",spacer,lemp->symbols[i]->name);
2464 /* Print a plink chain */
2465 PRIVATE void PlinkPrint(FILE *out, struct plink *plp, char *tag)
2468 fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->index);
2469 ConfigPrint(out,plp->cfp);
2476 /* Print an action to the given file descriptor. Return FALSE if
2477 ** nothing was actually printed.
2479 int PrintAction(struct action *ap, FILE *fp, int indent){
2483 fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->index);
2486 fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index);
2489 fprintf(fp,"%*s accept",indent,ap->sp->name);
2492 fprintf(fp,"%*s error",indent,ap->sp->name);
2495 fprintf(fp,"%*s reduce %-3d ** Parsing conflict **",
2496 indent,ap->sp->name,ap->x.rp->index);
2507 /* Generate the "y.output" log file */
2508 void ReportOutput(struct lemon *lemp)
2516 fp = file_open(lemp,".out","w");
2519 for(i=0; i<lemp->nstate; i++){
2520 stp = lemp->sorted[i];
2521 fprintf(fp,"State %d:\n",stp->index);
2522 if( lemp->basisflag ) cfp=stp->bp;
2526 if( cfp->dot==cfp->rp->nrhs ){
2527 sprintf(buf,"(%d)",cfp->rp->index);
2528 fprintf(fp," %5s ",buf);
2532 ConfigPrint(fp,cfp);
2535 SetPrint(fp,cfp->fws,lemp);
2536 PlinkPrint(fp,cfp->fplp,"To ");
2537 PlinkPrint(fp,cfp->bplp,"From");
2539 if( lemp->basisflag ) cfp=cfp->bp;
2543 for(ap=stp->ap; ap; ap=ap->next){
2544 if( PrintAction(ap,fp,30) ) fprintf(fp,"\n");
2552 /* Search for the file "name" which is in the same directory as
2553 ** the exacutable */
2554 PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
2561 cp = strrchr(argv0,'\\');
2563 cp = strrchr(argv0,'/');
2568 path = (char *)malloc( strlen(argv0) + strlen(name) + 2 );
2569 if( path ) sprintf(path,"%s/%s",argv0,name);
2572 extern char *getenv();
2573 pathlist = getenv("PATH");
2574 if( pathlist==0 ) pathlist = ".:/bin:/usr/bin";
2575 path = (char *)malloc( strlen(pathlist)+strlen(name)+2 );
2578 cp = strchr(pathlist,':');
2579 if( cp==0 ) cp = &pathlist[strlen(pathlist)];
2582 sprintf(path,"%s/%s",pathlist,name);
2584 if( c==0 ) pathlist = "";
2585 else pathlist = &cp[1];
2586 if( access(path,modemask)==0 ) break;
2593 /* Given an action, compute the integer value for that action
2594 ** which is to be put in the action table of the generated machine.
2595 ** Return negative if no action should be generated.
2597 PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
2601 case SHIFT: act = ap->x.stp->index; break;
2602 case REDUCE: act = ap->x.rp->index + lemp->nstate; break;
2603 case ERROR: act = lemp->nstate + lemp->nrule; break;
2604 case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break;
2605 default: act = -1; break;
2610 #define LINESIZE 1000
2611 /* The next cluster of routines are for reading the template file
2612 ** and writing the results to the generated parser */
2613 /* The first function transfers data from "in" to "out" until
2614 ** a line is seen which begins with "%%". The line number is
2617 ** if name!=0, then any word that begin with "Parse" is changed to
2618 ** begin with *name instead.
2620 PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno)
2623 char line[LINESIZE];
2624 while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){
2628 for(i=0; line[i]; i++){
2629 if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0
2630 && (i==0 || !safe_isalpha(line[i-1]))
2632 if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]);
2633 fprintf(out,"%s",name);
2639 fprintf(out,"%s",&line[iStart]);
2643 /* The next function finds the template file and opens it, returning
2644 ** a pointer to the opened file. */
2645 PRIVATE FILE *tplt_open(struct lemon *lemp)
2647 static char templatename[] = "lempar.c";
2653 if (lemp->templatename) {
2654 tpltname = lemp->templatename;
2657 cp = strrchr(lemp->filename,'.');
2659 sprintf(buf,"%.*s.lt",(int)(cp - lemp->filename),lemp->filename);
2661 sprintf(buf,"%s.lt",lemp->filename);
2663 if( access(buf,004)==0 ){
2666 tpltname = pathsearch(lemp->argv0,templatename,0);
2670 fprintf(stderr,"Can't find the parser driver template file \"%s\".\n",
2675 in = fopen(tpltname,"r");
2677 fprintf(stderr,"Can't open the template file \"%s\".\n",templatename);
2684 /* Print a string to the file and keep the linenumber up to date */
2685 PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str,
2686 int strln, int *lineno)
2688 if( str==0 ) return;
2689 fprintf(out,"#line %d \"%s\"\n",strln,lemp->filename); (*lineno)++;
2691 if( *str=='\n' ) (*lineno)++;
2695 fprintf(out,"\n#line %d \"%s\"\n",*lineno+2,lemp->outname); (*lineno)+=2;
2700 ** The following routine emits code for the destructor for the
2703 void emit_destructor_code(FILE *out, struct symbol *sp, struct lemon *lemp,
2709 if( sp->type==TERMINAL ){
2710 cp = lemp->tokendest;
2712 fprintf(out,"#line %d \"%s\"\n{",lemp->tokendestln,lemp->filename);
2714 cp = sp->destructor;
2716 fprintf(out,"#line %d \"%s\"\n{",sp->destructorln,lemp->filename);
2719 if( *cp=='$' && cp[1]=='$' ){
2720 fprintf(out,"(yypminor->yy%d)",sp->dtnum);
2724 if( *cp=='\n' ) linecnt++;
2727 (*lineno) += 3 + linecnt;
2728 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
2733 ** Return TRUE (non-zero) if the given symbol has a distructor.
2735 int has_destructor(struct symbol *sp, struct lemon *lemp)
2738 if( sp->type==TERMINAL ){
2739 ret = lemp->tokendest!=0;
2741 ret = sp->destructor!=0;
2747 ** Generate code which executes when the rule "rp" is reduced. Write
2748 ** the code to "out". Make sure lineno stays up-to-date.
2750 PRIVATE void emit_code(FILE *out, struct rule *rp, struct lemon *lemp,
2756 char lhsused = 0; /* True if the LHS element has been used */
2757 char used[MAXRHS]; /* True for each RHS element which is used */
2759 for(i=0; i<rp->nrhs; i++) used[i] = 0;
2762 /* Generate code to do the reduce action */
2764 fprintf(out,"#line %d \"%s\"\n{",rp->line,lemp->filename);
2765 for(cp=rp->code; *cp; cp++){
2766 if( safe_isalpha(*cp) && (cp==rp->code || !safe_isalnum(cp[-1])) ){
2768 for(xp= &cp[1]; safe_isalnum(*xp); xp++);
2771 if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){
2772 fprintf(out,"yygotominor.yy%d",rp->lhs->dtnum);
2776 for(i=0; i<rp->nrhs; i++){
2777 if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
2778 fprintf(out,"yymsp[%d].minor.yy%d",i-rp->nrhs+1,rp->rhs[i]->dtnum);
2787 if( *cp=='\n' ) linecnt++;
2790 (*lineno) += 3 + linecnt;
2791 fprintf(out,"}\n#line %d \"%s\"\n",*lineno,lemp->outname);
2792 } /* End if( rp->code ) */
2794 /* Check to make sure the LHS has been used */
2795 if( rp->lhsalias && !lhsused ){
2796 ErrorMsg(lemp->filename,rp->ruleline,
2797 "Label \"%s\" for \"%s(%s)\" is never used.",
2798 rp->lhsalias,rp->lhs->name,rp->lhsalias);
2802 /* Generate destructor code for RHS symbols which are not used in the
2804 for(i=0; i<rp->nrhs; i++){
2805 if( rp->rhsalias[i] && !used[i] ){
2806 ErrorMsg(lemp->filename,rp->ruleline,
2807 "Label $%s$ for \"%s(%s)\" is never used.",
2808 rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]);
2810 }else if( rp->rhsalias[i]==0 ){
2811 if( has_destructor(rp->rhs[i],lemp) ){
2812 fprintf(out," yy_destructor(%d,&yymsp[%d].minor);\n",
2813 rp->rhs[i]->index,i-rp->nrhs+1); (*lineno)++;
2815 fprintf(out," /* No destructor defined for %s */\n",
2825 ** Print the definition of the union used for the parser's data stack.
2826 ** This union contains fields for every possible data type for tokens
2827 ** and nonterminals. In the process of computing and printing this
2828 ** union, also set the ".dtnum" field of every terminal and nonterminal
2831 void print_stack_union(
2832 FILE *out, /* The output stream */
2833 struct lemon *lemp, /* The main info structure for this parser */
2834 int *plineno, /* Pointer to the line number */
2835 int mhflag) /* True if generating makeheaders output */
2837 int lineno = *plineno; /* The line number of the output */
2838 char **types; /* A hash table of datatypes */
2839 int arraysize; /* Size of the "types" array */
2840 int maxdtlength; /* Maximum length of any ".datatype" field. */
2841 char *stddt; /* Standardized name for a datatype */
2842 int i,j; /* Loop counters */
2843 int hash; /* For hashing the name of a type */
2844 char *name; /* Name of the parser */
2846 /* Allocate and initialize types[] and allocate stddt[] */
2847 arraysize = lemp->nsymbol * 2;
2848 types = (char**)malloc( arraysize * sizeof(char*) );
2849 for(i=0; i<arraysize; i++) types[i] = 0;
2851 for(i=0; i<lemp->nsymbol; i++){
2853 struct symbol *sp = lemp->symbols[i];
2854 if( sp->datatype==0 ) continue;
2855 len = strlen(sp->datatype);
2856 if( len>maxdtlength ) maxdtlength = len;
2858 stddt = (char*)malloc( maxdtlength*2 + 1 );
2859 if( types==0 || stddt==0 ){
2860 fprintf(stderr,"Out of memory.\n");
2864 /* Build a hash table of datatypes. The ".dtnum" field of each symbol
2865 ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is
2866 ** used for terminal symbols and for nonterminals which don't specify
2867 ** a datatype using the %type directive. */
2868 for(i=0; i<lemp->nsymbol; i++){
2869 struct symbol *sp = lemp->symbols[i];
2871 if( sp==lemp->errsym ){
2872 sp->dtnum = arraysize+1;
2875 if( sp->type!=NONTERMINAL || sp->datatype==0 ){
2881 while( safe_isspace(*cp) ) cp++;
2882 while( *cp ) stddt[j++] = *cp++;
2883 while( j>0 && safe_isspace(stddt[j-1]) ) j--;
2886 for(j=0; stddt[j]; j++){
2887 hash = hash*53 + stddt[j];
2889 if( hash<0 ) hash = -hash;
2890 hash = hash%arraysize;
2891 while( types[hash] ){
2892 if( strcmp(types[hash],stddt)==0 ){
2893 sp->dtnum = hash + 1;
2897 if( hash>=arraysize ) hash = 0;
2899 if( types[hash]==0 ){
2900 sp->dtnum = hash + 1;
2901 types[hash] = (char*)malloc( strlen(stddt)+1 );
2902 if( types[hash]==0 ){
2903 fprintf(stderr,"Out of memory.\n");
2906 strcpy(types[hash],stddt);
2910 /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */
2911 name = lemp->name ? lemp->name : "Parse";
2913 if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; }
2914 fprintf(out,"#define %sTOKENTYPE %s\n",name,
2915 lemp->tokentype?lemp->tokentype:"void*"); lineno++;
2916 if( mhflag ){ fprintf(out,"#endif\n"); lineno++; }
2917 fprintf(out,"typedef union {\n"); lineno++;
2918 fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++;
2919 for(i=0; i<arraysize; i++){
2920 if( types[i]==0 ) continue;
2921 fprintf(out," %s yy%d;\n",types[i],i+1); lineno++;
2924 fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++;
2927 fprintf(out,"} YYMINORTYPE;\n"); lineno++;
2931 /* Generate C source code for the parser */
2934 int mhflag) /* Output in makeheaders format if true */
2937 char line[LINESIZE];
2946 in = tplt_open(lemp);
2948 out = file_open(lemp,".c","w");
2954 tplt_xfer(lemp->name,in,out,&lineno);
2956 /* Generate the include code, if any */
2957 tplt_print(out,lemp,lemp->include,lemp->includeln,&lineno);
2959 char *name = file_makename_using_basename(lemp, ".h");
2960 fprintf(out,"#include \"%s\"\n", name); lineno++;
2963 tplt_xfer(lemp->name,in,out,&lineno);
2965 /* Generate #defines for all tokens */
2968 fprintf(out,"#if INTERFACE\n"); lineno++;
2969 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
2971 for(i=1; i<lemp->nterminal; i++){
2972 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
2975 fprintf(out,"#endif\n"); lineno++;
2977 tplt_xfer(lemp->name,in,out,&lineno);
2979 /* Generate the defines */
2980 fprintf(out,"/* \001 */\n");
2981 fprintf(out,"#define YYCODETYPE %s\n",
2982 lemp->nsymbol>250?"int":"unsigned char"); lineno++;
2983 fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
2984 fprintf(out,"#define YYACTIONTYPE %s\n",
2985 lemp->nstate+lemp->nrule>250?"int":"unsigned char"); lineno++;
2986 print_stack_union(out,lemp,&lineno,mhflag);
2987 if( lemp->stacksize ){
2988 if( atoi(lemp->stacksize)<=0 ){
2989 ErrorMsg(lemp->filename,0,
2990 "Illegal stack size: [%s]. The stack size should be an integer constant.",
2993 lemp->stacksize = "100";
2995 fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++;
2997 fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++;
3000 fprintf(out,"#if INTERFACE\n"); lineno++;
3002 name = lemp->name ? lemp->name : "Parse";
3003 if( lemp->arg && lemp->arg[0] ){
3005 i = strlen(lemp->arg);
3006 while( i>=1 && safe_isspace(lemp->arg[i-1]) ) i--;
3007 while( i>=1 && safe_isalnum(lemp->arg[i-1]) ) i--;
3008 fprintf(out,"#define %sARGDECL ,%s\n",name,&lemp->arg[i]); lineno++;
3009 fprintf(out,"#define %sXARGDECL %s;\n",name,lemp->arg); lineno++;
3010 fprintf(out,"#define %sANSIARGDECL ,%s\n",name,lemp->arg); lineno++;
3012 fprintf(out,"#define %sARGDECL\n",name); lineno++;
3013 fprintf(out,"#define %sXARGDECL\n",name); lineno++;
3014 fprintf(out,"#define %sANSIARGDECL\n",name); lineno++;
3017 fprintf(out,"#endif\n"); lineno++;
3019 fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++;
3020 fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
3021 fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++;
3022 fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++;
3023 tplt_xfer(lemp->name,in,out,&lineno);
3025 /* Generate the action table.
3027 ** Each entry in the action table is an element of the following
3029 ** struct yyActionEntry {
3030 ** YYCODETYPE lookahead;
3031 ** YYACTIONTYPE action;
3032 ** struct yyActionEntry *next;
3035 ** The entries are grouped into hash tables, one hash table for each
3036 ** parser state. The hash table has a size which is the smallest
3037 ** power of two needed to hold all entries.
3041 /* Loop over parser states */
3042 for(i=0; i<lemp->nstate; i++){
3043 size_t tablesize; /* size of the hash table */
3044 unsigned int j,k; /* Loop counter */
3045 int collide[2048]; /* The collision chain for the table */
3046 struct action *table[2048]; /* Build the hash table here */
3048 /* Find the number of actions and initialize the hash table */
3049 stp = lemp->sorted[i];
3050 stp->tabstart = tablecnt;
3052 for(ap=stp->ap; ap; ap=ap->next){
3053 if( ap->sp->index!=lemp->nsymbol && compute_action(lemp,ap)>=0 ){
3058 while( tablesize<stp->naction ) tablesize += tablesize;
3059 assert( tablesize<= sizeof(table)/sizeof(table[0]) );
3060 for(j=0; j<tablesize; j++){
3065 /* Hash the actions into the hash table */
3066 stp->tabdfltact = lemp->nstate + lemp->nrule;
3067 for(ap=stp->ap; ap; ap=ap->next){
3068 int action = compute_action(lemp,ap);
3070 if( ap->sp->index==lemp->nsymbol ){
3071 stp->tabdfltact = action;
3072 }else if( action>=0 ){
3073 h = ap->sp->index & (tablesize-1);
3074 ap->collide = table[h];
3079 /* Resolve collisions */
3080 for(j=k=0; j<tablesize; j++){
3081 if( table[j] && table[j]->collide ){
3082 while( table[k] ) k++;
3083 table[k] = table[j]->collide;
3085 table[j]->collide = 0;
3090 /* Print the hash table */
3091 fprintf(out,"/* State %d */\n",stp->index); lineno++;
3092 for(j=0; j<tablesize; j++){
3095 " {YYNOCODE,0,0}, /* Unused */\n");
3097 fprintf(out," {%4d,%4d, ",
3098 table[j]->sp->index,
3099 compute_action(lemp,table[j]));
3100 if( collide[j]>=0 ){
3101 fprintf(out,"&yyActionTable[%4d] }, /* ",
3102 collide[j] + tablecnt);
3104 fprintf(out,"0 }, /* ");
3106 PrintAction(table[j],out,22);
3107 fprintf(out," */\n");
3112 /* Update the table count */
3113 tablecnt += tablesize;
3115 tplt_xfer(lemp->name,in,out,&lineno);
3116 lemp->tablesize = tablecnt;
3118 /* Generate the state table
3120 ** Each entry is an element of the following structure:
3121 ** struct yyStateEntry {
3122 ** struct yyActionEntry *hashtbl;
3124 ** YYACTIONTYPE actionDefault;
3127 for(i=0; i<lemp->nstate; i++){
3129 stp = lemp->sorted[i];
3131 while( tablesize<stp->naction ) tablesize += tablesize;
3132 fprintf(out," { &yyActionTable[%d], %lu, %d},\n",
3134 (unsigned long)tablesize - 1,
3135 stp->tabdfltact); lineno++;
3137 tplt_xfer(lemp->name,in,out,&lineno);
3139 /* Generate a table containing the symbolic name of every symbol */
3140 for(i=0; i<lemp->nsymbol; i++){
3141 sprintf(line,"\"%s\",",lemp->symbols[i]->name);
3142 fprintf(out," %-15s",line);
3143 if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
3145 if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
3146 tplt_xfer(lemp->name,in,out,&lineno);
3148 /* Generate code which executes every time a symbol is popped from
3149 ** the stack while processing errors or while destroying the parser.
3150 ** (In other words, generate the %destructor actions) */
3151 if( lemp->tokendest ){
3152 for(i=0; i<lemp->nsymbol; i++){
3153 struct symbol *sp = lemp->symbols[i];
3154 if( sp==0 || sp->type!=TERMINAL ) continue;
3155 fprintf(out," case %d:\n",sp->index); lineno++;
3157 for(i=0; i<lemp->nsymbol && lemp->symbols[i]->type!=TERMINAL; i++);
3158 if( i<lemp->nsymbol ){
3159 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3160 fprintf(out," break;\n"); lineno++;
3163 for(i=0; i<lemp->nsymbol; i++){
3164 struct symbol *sp = lemp->symbols[i];
3165 if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue;
3166 fprintf(out," case %d:\n",sp->index); lineno++;
3167 emit_destructor_code(out,lemp->symbols[i],lemp,&lineno);
3168 fprintf(out," break;\n"); lineno++;
3170 tplt_xfer(lemp->name,in,out,&lineno);
3172 /* Generate code which executes whenever the parser stack overflows */
3173 tplt_print(out,lemp,lemp->overflow,lemp->overflowln,&lineno);
3174 tplt_xfer(lemp->name,in,out,&lineno);
3176 /* Generate the table of rule information
3178 ** Note: This code depends on the fact that rules are number
3179 ** sequentually beginning with 0.
3181 for(rp=lemp->rule; rp; rp=rp->next){
3182 fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++;
3184 tplt_xfer(lemp->name,in,out,&lineno);
3186 /* Generate code which execution during each REDUCE action */
3187 for(rp=lemp->rule; rp; rp=rp->next){
3188 fprintf(out," case %d:\n",rp->index); lineno++;
3189 fprintf(out," YYTRACE(\"%s ::=",rp->lhs->name);
3190 for(i=0; i<rp->nrhs; i++) fprintf(out," %s",rp->rhs[i]->name);
3191 fprintf(out,"\")\n"); lineno++;
3192 emit_code(out,rp,lemp,&lineno);
3193 fprintf(out," break;\n"); lineno++;
3195 tplt_xfer(lemp->name,in,out,&lineno);
3197 /* Generate code which executes if a parse fails */
3198 tplt_print(out,lemp,lemp->failure,lemp->failureln,&lineno);
3199 tplt_xfer(lemp->name,in,out,&lineno);
3201 /* Generate code which executes when a syntax error occurs */
3202 tplt_print(out,lemp,lemp->error,lemp->errorln,&lineno);
3203 tplt_xfer(lemp->name,in,out,&lineno);
3205 /* Generate code which executes when the parser accepts its input */
3206 tplt_print(out,lemp,lemp->accept,lemp->acceptln,&lineno);
3207 tplt_xfer(lemp->name,in,out,&lineno);
3209 /* Append any addition code the user desires */
3210 tplt_print(out,lemp,lemp->extracode,lemp->extracodeln,&lineno);
3217 /* Generate a header file for the parser */
3218 void ReportHeader(struct lemon *lemp)
3222 char line[LINESIZE];
3223 char pattern[LINESIZE];
3226 if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
3228 in = file_open(lemp,".h","r");
3230 for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
3231 sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3232 if( strcmp(line,pattern) ) break;
3235 if( i==lemp->nterminal ){
3236 /* No change in the file. Don't rewrite it. */
3240 out = file_open(lemp,".h","w");
3242 for(i=1; i<lemp->nterminal; i++){
3243 fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
3250 /* Reduce the size of the action tables, if possible, by making use
3253 ** In this version, if all REDUCE actions use the same rule, make
3254 ** them the default. Only default them if there are more than one.
3256 void CompressTables(struct lemon *lemp)
3264 for(i=0; i<lemp->nstate; i++){
3265 stp = lemp->sorted[i];
3267 /* Find the first REDUCE action */
3268 for(ap=stp->ap; ap && ap->type!=REDUCE; ap=ap->next);
3269 if( ap==0 ) continue;
3271 /* Remember the rule used */
3274 /* See if all other REDUCE acitons use the same rule */
3276 for(ap=ap->next; ap; ap=ap->next){
3277 if( ap->type==REDUCE ){
3278 if( ap->x.rp!=rp ) break;
3282 if( ap || cnt==1 ) continue;
3284 /* Combine all REDUCE actions into a single default */
3285 for(ap=stp->ap; ap && ap->type!=REDUCE; ap=ap->next);
3287 ap->sp = Symbol_new("{default}");
3288 for(ap=ap->next; ap; ap=ap->next){
3289 if( ap->type==REDUCE ) ap->type = NOT_USED;
3291 stp->ap = Action_sort(stp->ap);
3294 /***************** From the file "set.c" ************************************/
3296 ** Set manipulation routines for the LEMON parser generator.
3299 static int size = 0;
3301 /* Set the set size */
3307 /* Allocate a new set */
3311 s = (char*)malloc( size );
3313 extern void memory_error();
3316 for(i=0; i<size; i++) s[i] = 0;
3320 /* Deallocate a set */
3321 void SetFree(char *s)
3326 /* Add a new element to the set. Return TRUE if the element was added
3327 ** and FALSE if it was already there. */
3328 int SetAdd(char *s, int e)
3336 /* Add every element of s2 to s1. Return TRUE if s1 changes. */
3337 int SetUnion(char *s1, char *s2)
3341 for(i=0; i<size; i++){
3342 if( s2[i]==0 ) continue;
3350 /********************** From the file "table.c" ****************************/
3352 ** All code in this file has been automatically generated
3353 ** from a specification in the file
3355 ** by the associative array code building program "aagen".
3356 ** Do not edit this file! Instead, edit the specification
3357 ** file, then rerun aagen.
3360 ** Code for processing tables in the LEMON parser generator.
3363 PRIVATE int strhash(char *x)
3366 while( *x) h = h*13 + *(x++);
3370 /* Works like strdup, sort of. Save a string in malloced memory, but
3371 ** keep strings in a table so that the same string is not in more
3374 char *Strsafe(char *y)
3378 z = Strsafe_find(y);
3379 if( z==0 && (z=malloc( strlen(y)+1 ))!=0 ){
3387 /* There is one instance of the following structure for each
3388 ** associative array of type "x1".
3391 int size; /* The number of available slots. */
3392 /* Must be a power of 2 greater than or */
3394 int count; /* Number of currently slots filled */
3395 struct s_x1node *tbl; /* The data stored here */
3396 struct s_x1node **ht; /* Hash table for lookups */
3399 /* There is one instance of this structure for every data element
3400 ** in an associative array of type "x1".
3402 typedef struct s_x1node {
3403 char *data; /* The data */
3404 struct s_x1node *next; /* Next entry with the same hash */
3405 struct s_x1node **from; /* Previous link */
3408 /* There is only one instance of the array, which is the following */
3409 static struct s_x1 *x1a;
3411 /* Allocate a new associative array */
3412 void Strsafe_init(void){
3414 x1a = (struct s_x1*)malloc( sizeof(struct s_x1) );
3418 x1a->tbl = (x1node*)malloc(
3419 (sizeof(x1node) + sizeof(x1node*))*1024 );
3425 x1a->ht = (x1node**)&(x1a->tbl[1024]);
3426 for(i=0; i<1024; i++) x1a->ht[i] = 0;
3430 /* Insert a new record into the array. Return TRUE if successful.
3431 ** Prior data with the same key is NOT overwritten */
3432 int Strsafe_insert(char *data)
3438 if( x1a==0 ) return 0;
3440 h = ph & (x1a->size-1);
3443 if( strcmp(np->data,data)==0 ){
3444 /* An existing entry with the same key is found. */
3445 /* Fail because overwrite is not allows. */
3450 if( x1a->count>=x1a->size ){
3451 /* Need to make the hash table bigger */
3454 array.size = size = x1a->size*2;
3455 array.count = x1a->count;
3456 array.tbl = (x1node*)malloc(
3457 (sizeof(x1node) + sizeof(x1node*))*size );
3458 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3459 array.ht = (x1node**)&(array.tbl[size]);
3460 for(i=0; i<size; i++) array.ht[i] = 0;
3461 for(i=0; i<x1a->count; i++){
3462 x1node *oldnp, *newnp;
3463 oldnp = &(x1a->tbl[i]);
3464 h = strhash(oldnp->data) & (size-1);
3465 newnp = &(array.tbl[i]);
3466 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3467 newnp->next = array.ht[h];
3468 newnp->data = oldnp->data;
3469 newnp->from = &(array.ht[h]);
3470 array.ht[h] = newnp;
3475 /* Insert the new data */
3476 h = ph & (x1a->size-1);
3477 np = &(x1a->tbl[x1a->count++]);
3479 if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next);
3480 np->next = x1a->ht[h];
3482 np->from = &(x1a->ht[h]);
3486 /* Return a pointer to data assigned to the given key. Return NULL
3487 ** if no such key. */
3488 char *Strsafe_find(char *key)
3493 if( x1a==0 ) return 0;
3494 h = strhash(key) & (x1a->size-1);
3497 if( strcmp(np->data,key)==0 ) break;
3500 return np ? np->data : 0;
3503 /* Return a pointer to the (terminal or nonterminal) symbol "x".
3504 ** Create a new symbol if this is the first time "x" has been seen.
3506 struct symbol *Symbol_new(char *x)
3510 sp = Symbol_find(x);
3512 sp = (struct symbol *)malloc( sizeof(struct symbol) );
3514 sp->name = Strsafe(x);
3515 sp->type = safe_isupper(*x) ? TERMINAL : NONTERMINAL;
3520 sp->lambda = BOOL_FALSE;
3523 Symbol_insert(sp,sp->name);
3528 /* Compare two symbols */
3529 int Symbolcmpp(const void *a_arg, const void *b_arg)
3531 /* MSVC complains about this, but it's wrong. GCC does not
3532 complain about this, as is right. From Guy Harris:
3534 At least as I read the ANSI C spec, GCC is right and MSVC is wrong here.
3535 The arguments are pointers to "const void", and should be cast to
3536 pointers to "const struct symbol *"; however, at least as I read the
3537 spec, "const struct symbol **" is "pointer to pointer to const struct
3538 symbol", not "pointer to const pointer to struct symbol".
3541 struct symbol *const *a = a_arg;
3542 struct symbol *const *b = b_arg;
3544 return strcmp((**a).name,(**b).name);
3547 /* There is one instance of the following structure for each
3548 ** associative array of type "x2".
3551 int size; /* The number of available slots. */
3552 /* Must be a power of 2 greater than or */
3554 int count; /* Number of currently slots filled */
3555 struct s_x2node *tbl; /* The data stored here */
3556 struct s_x2node **ht; /* Hash table for lookups */
3559 /* There is one instance of this structure for every data element
3560 ** in an associative array of type "x2".
3562 typedef struct s_x2node {
3563 struct symbol *data; /* The data */
3564 char *key; /* The key */
3565 struct s_x2node *next; /* Next entry with the same hash */
3566 struct s_x2node **from; /* Previous link */
3569 /* There is only one instance of the array, which is the following */
3570 static struct s_x2 *x2a;
3572 /* Allocate a new associative array */
3573 void Symbol_init(void){
3575 x2a = (struct s_x2*)malloc( sizeof(struct s_x2) );
3579 x2a->tbl = (x2node*)malloc(
3580 (sizeof(x2node) + sizeof(x2node*))*128 );
3586 x2a->ht = (x2node**)&(x2a->tbl[128]);
3587 for(i=0; i<128; i++) x2a->ht[i] = 0;
3591 /* Insert a new record into the array. Return TRUE if successful.
3592 ** Prior data with the same key is NOT overwritten */
3593 int Symbol_insert(struct symbol *data, char *key)
3599 if( x2a==0 ) return 0;
3601 h = ph & (x2a->size-1);
3604 if( strcmp(np->key,key)==0 ){
3605 /* An existing entry with the same key is found. */
3606 /* Fail because overwrite is not allows. */
3611 if( x2a->count>=x2a->size ){
3612 /* Need to make the hash table bigger */
3615 array.size = size = x2a->size*2;
3616 array.count = x2a->count;
3617 array.tbl = (x2node*)malloc(
3618 (sizeof(x2node) + sizeof(x2node*))*size );
3619 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3620 array.ht = (x2node**)&(array.tbl[size]);
3621 for(i=0; i<size; i++) array.ht[i] = 0;
3622 for(i=0; i<x2a->count; i++){
3623 x2node *oldnp, *newnp;
3624 oldnp = &(x2a->tbl[i]);
3625 h = strhash(oldnp->key) & (size-1);
3626 newnp = &(array.tbl[i]);
3627 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3628 newnp->next = array.ht[h];
3629 newnp->key = oldnp->key;
3630 newnp->data = oldnp->data;
3631 newnp->from = &(array.ht[h]);
3632 array.ht[h] = newnp;
3637 /* Insert the new data */
3638 h = ph & (x2a->size-1);
3639 np = &(x2a->tbl[x2a->count++]);
3642 if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next);
3643 np->next = x2a->ht[h];
3645 np->from = &(x2a->ht[h]);
3649 /* Return a pointer to data assigned to the given key. Return NULL
3650 ** if no such key. */
3651 struct symbol *Symbol_find(char *key)
3656 if( x2a==0 ) return 0;
3657 h = strhash(key) & (x2a->size-1);
3660 if( strcmp(np->key,key)==0 ) break;
3663 return np ? np->data : 0;
3666 /* Return the n-th data. Return NULL if n is out of range. */
3667 struct symbol *Symbol_Nth(int n)
3669 struct symbol *data;
3670 if( x2a && n>0 && n<=x2a->count ){
3671 data = x2a->tbl[n-1].data;
3678 /* Return the size of the array */
3679 int Symbol_count(void)
3681 return x2a ? x2a->count : 0;
3684 /* Return an array of pointers to all data in the table.
3685 ** The array is obtained from malloc. Return NULL if memory allocation
3686 ** problems, or if the array is empty. */
3687 struct symbol **Symbol_arrayof(void)
3689 struct symbol **array;
3691 if( x2a==0 ) return 0;
3693 array = (struct symbol **)malloc( sizeof(struct symbol *)*size );
3695 for(i=0; i<size; i++) array[i] = x2a->tbl[i].data;
3700 /* Compare two configurations */
3701 int Configcmp(const void *a_arg, const void *b_arg)
3703 const struct config *a = a_arg, *b = b_arg;
3705 x = a->rp->index - b->rp->index;
3706 if( x==0 ) x = a->dot - b->dot;
3710 /* Compare two states */
3711 PRIVATE int statecmp(struct config *a, struct config *b)
3714 for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){
3715 rc = a->rp->index - b->rp->index;
3716 if( rc==0 ) rc = a->dot - b->dot;
3726 PRIVATE int statehash(struct config *a)
3730 h = h*571 + a->rp->index*37 + a->dot;
3736 /* Allocate a new state structure */
3737 struct state *State_new(void)
3740 new = (struct state *)malloc( sizeof(struct state) );
3745 /* There is one instance of the following structure for each
3746 ** associative array of type "x3".
3749 int size; /* The number of available slots. */
3750 /* Must be a power of 2 greater than or */
3752 int count; /* Number of currently slots filled */
3753 struct s_x3node *tbl; /* The data stored here */
3754 struct s_x3node **ht; /* Hash table for lookups */
3757 /* There is one instance of this structure for every data element
3758 ** in an associative array of type "x3".
3760 typedef struct s_x3node {
3761 struct state *data; /* The data */
3762 struct config *key; /* The key */
3763 struct s_x3node *next; /* Next entry with the same hash */
3764 struct s_x3node **from; /* Previous link */
3767 /* There is only one instance of the array, which is the following */
3768 static struct s_x3 *x3a;
3770 /* Allocate a new associative array */
3771 void State_init(void){
3773 x3a = (struct s_x3*)malloc( sizeof(struct s_x3) );
3777 x3a->tbl = (x3node*)malloc(
3778 (sizeof(x3node) + sizeof(x3node*))*128 );
3784 x3a->ht = (x3node**)&(x3a->tbl[128]);
3785 for(i=0; i<128; i++) x3a->ht[i] = 0;
3789 /* Insert a new record into the array. Return TRUE if successful.
3790 ** Prior data with the same key is NOT overwritten */
3791 int State_insert(struct state *data, struct config *key)
3797 if( x3a==0 ) return 0;
3798 ph = statehash(key);
3799 h = ph & (x3a->size-1);
3802 if( statecmp(np->key,key)==0 ){
3803 /* An existing entry with the same key is found. */
3804 /* Fail because overwrite is not allows. */
3809 if( x3a->count>=x3a->size ){
3810 /* Need to make the hash table bigger */
3813 array.size = size = x3a->size*2;
3814 array.count = x3a->count;
3815 array.tbl = (x3node*)malloc(
3816 (sizeof(x3node) + sizeof(x3node*))*size );
3817 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3818 array.ht = (x3node**)&(array.tbl[size]);
3819 for(i=0; i<size; i++) array.ht[i] = 0;
3820 for(i=0; i<x3a->count; i++){
3821 x3node *oldnp, *newnp;
3822 oldnp = &(x3a->tbl[i]);
3823 h = statehash(oldnp->key) & (size-1);
3824 newnp = &(array.tbl[i]);
3825 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3826 newnp->next = array.ht[h];
3827 newnp->key = oldnp->key;
3828 newnp->data = oldnp->data;
3829 newnp->from = &(array.ht[h]);
3830 array.ht[h] = newnp;
3835 /* Insert the new data */
3836 h = ph & (x3a->size-1);
3837 np = &(x3a->tbl[x3a->count++]);
3840 if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next);
3841 np->next = x3a->ht[h];
3843 np->from = &(x3a->ht[h]);
3847 /* Return a pointer to data assigned to the given key. Return NULL
3848 ** if no such key. */
3849 struct state *State_find(struct config *key)
3854 if( x3a==0 ) return 0;
3855 h = statehash(key) & (x3a->size-1);
3858 if( statecmp(np->key,key)==0 ) break;
3861 return np ? np->data : 0;
3864 /* Return an array of pointers to all data in the table.
3865 ** The array is obtained from malloc. Return NULL if memory allocation
3866 ** problems, or if the array is empty. */
3867 struct state **State_arrayof(void)
3869 struct state **array;
3871 if( x3a==0 ) return 0;
3873 array = (struct state **)malloc( sizeof(struct state *)*size );
3875 for(i=0; i<size; i++) array[i] = x3a->tbl[i].data;
3880 /* Hash a configuration */
3881 PRIVATE int confighash(struct config *a)
3884 h = h*571 + a->rp->index*37 + a->dot;
3888 /* There is one instance of the following structure for each
3889 ** associative array of type "x4".
3892 int size; /* The number of available slots. */
3893 /* Must be a power of 2 greater than or */
3895 int count; /* Number of currently slots filled */
3896 struct s_x4node *tbl; /* The data stored here */
3897 struct s_x4node **ht; /* Hash table for lookups */
3900 /* There is one instance of this structure for every data element
3901 ** in an associative array of type "x4".
3903 typedef struct s_x4node {
3904 struct config *data; /* The data */
3905 struct s_x4node *next; /* Next entry with the same hash */
3906 struct s_x4node **from; /* Previous link */
3909 /* There is only one instance of the array, which is the following */
3910 static struct s_x4 *x4a;
3912 /* Allocate a new associative array */
3913 void Configtable_init(void){
3915 x4a = (struct s_x4*)malloc( sizeof(struct s_x4) );
3919 x4a->tbl = (x4node*)malloc(
3920 (sizeof(x4node) + sizeof(x4node*))*64 );
3926 x4a->ht = (x4node**)&(x4a->tbl[64]);
3927 for(i=0; i<64; i++) x4a->ht[i] = 0;
3931 /* Insert a new record into the array. Return TRUE if successful.
3932 ** Prior data with the same key is NOT overwritten */
3933 int Configtable_insert(struct config *data)
3939 if( x4a==0 ) return 0;
3940 ph = confighash(data);
3941 h = ph & (x4a->size-1);
3944 if( Configcmp(np->data,data)==0 ){
3945 /* An existing entry with the same key is found. */
3946 /* Fail because overwrite is not allows. */
3951 if( x4a->count>=x4a->size ){
3952 /* Need to make the hash table bigger */
3955 array.size = size = x4a->size*2;
3956 array.count = x4a->count;
3957 array.tbl = (x4node*)malloc(
3958 (sizeof(x4node) + sizeof(x4node*))*size );
3959 if( array.tbl==0 ) return 0; /* Fail due to malloc failure */
3960 array.ht = (x4node**)&(array.tbl[size]);
3961 for(i=0; i<size; i++) array.ht[i] = 0;
3962 for(i=0; i<x4a->count; i++){
3963 x4node *oldnp, *newnp;
3964 oldnp = &(x4a->tbl[i]);
3965 h = confighash(oldnp->data) & (size-1);
3966 newnp = &(array.tbl[i]);
3967 if( array.ht[h] ) array.ht[h]->from = &(newnp->next);
3968 newnp->next = array.ht[h];
3969 newnp->data = oldnp->data;
3970 newnp->from = &(array.ht[h]);
3971 array.ht[h] = newnp;
3976 /* Insert the new data */
3977 h = ph & (x4a->size-1);
3978 np = &(x4a->tbl[x4a->count++]);
3980 if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next);
3981 np->next = x4a->ht[h];
3983 np->from = &(x4a->ht[h]);
3987 /* Return a pointer to data assigned to the given key. Return NULL
3988 ** if no such key. */
3989 struct config *Configtable_find(struct config *key)
3994 if( x4a==0 ) return 0;
3995 h = confighash(key) & (x4a->size-1);
3998 if( Configcmp(np->data,key)==0 ) break;
4001 return np ? np->data : 0;
4004 /* Remove all data from the table. Pass each data to the function "f"
4005 ** as it is removed. ("f" may be null to avoid this step.) */
4006 void Configtable_clear(int(*f)(struct config *))
4009 if( x4a==0 || x4a->count==0 ) return;
4010 if( f ) for(i=0; i<x4a->count; i++) (*f)(x4a->tbl[i].data);
4011 for(i=0; i<x4a->size; i++) x4a->ht[i] = 0;