Fix typos.
[obnox/wireshark/wip.git] / epan / dtd_parse.l
1 /*
2  * We want to stop processing when we get to the end of the input.
3  */
4 %option noyywrap
5
6 /*
7  * We don't use unput, so don't generate code for it.
8  */
9 %option nounput 
10
11 /*
12  * We don't read from the terminal.
13  */
14 %option never-interactive
15
16 /*
17  * Prefix scanner routines with "Dtd_Parse_" rather than "yy", so this scanner
18  * can coexist with other scanners.
19  */
20 %option prefix="Dtd_Parse_"
21
22 %option outfile="dtd_parse.c"
23
24 %{
25
26         /* dtd_parse.l
27         * an XML dissector for Wireshark 
28         * lexical analyzer for DTDs
29         *
30         * Copyright 2004, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
31         *
32         * $Id$
33         *
34         * Wireshark - Network traffic analyzer
35         * By Gerald Combs <gerald@wireshark.org>
36         * Copyright 1998 Gerald Combs
37         *
38         * This program is free software; you can redistribute it and/or
39         * modify it under the terms of the GNU General Public License
40         * as published by the Free Software Foundation; either version 2
41         * of the License, or (at your option) any later version.
42         * 
43         * This program is distributed in the hope that it will be useful,
44         * but WITHOUT ANY WARRANTY; without even the implied warranty of
45         * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
46         * GNU General Public License for more details.
47         * 
48         * You should have received a copy of the GNU General Public License
49         * along with this program; if not, write to the Free Software
50         * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
51         */
52         
53 #include <glib.h>
54 #include <string.h>
55         
56 #include "dtd.h"
57 #include "dtd_grammar.h"
58 #include "dtd_parse.h"
59 #include "dtd_parse_lex.h"
60         
61 #ifdef NEED_G_ASCII_STRCASECMP_H
62 #include "g_ascii_strcasecmp.h"
63 #endif
64
65         struct _proto_xmlpi_attr {
66                 gchar* name;
67                 void (*act)(gchar*);
68         };
69
70         static void* pParser;
71         static GString* input_string;   
72         static guint offset;
73         static guint len;
74         static gchar* location;
75         static gchar* attr_name;
76         
77         static int my_yyinput(char* buff,guint size);
78         
79         static dtd_token_data_t* new_token(gchar*);
80
81         static dtd_build_data_t* build_data;
82         
83         static void set_proto_name (gchar* val) { if(build_data->proto_name) g_free(build_data->proto_name); build_data->proto_name = g_strdup(val); }
84         static void set_media_type (gchar* val) { if(build_data->media_type) g_free(build_data->media_type); build_data->media_type = g_strdup(val); }
85         static void set_proto_root (gchar* val) { if(build_data->proto_root) g_free(build_data->proto_root); build_data->proto_root = g_strdup(val); }
86         static void set_description (gchar* val) { if(build_data->description) g_free(build_data->description); build_data->description = g_strdup(val); }
87         static void set_recursive (gchar* val) { build_data->recursion = ( g_ascii_strcasecmp(val,"yes") == 0 ) ? TRUE : FALSE; }
88
89         static struct _proto_xmlpi_attr proto_attrs[] =
90         {
91                 { "proto_name", set_proto_name },
92                 { "media", set_media_type },
93                 { "root", set_proto_root },
94                 { "description", set_description },
95                 { "hierarchy", set_recursive },
96                 {NULL,NULL}
97         };
98         
99 #ifdef DEBUG_DTD_PARSER
100 #define DEBUG_DTD_TOKEN fprintf(stderr,"->%s (%i)%s\n",location,token_type,yytext)
101 #else
102 #define DEBUG_DTD_TOKEN
103 #endif
104     
105 #define DTD_PARSE(token_type) \
106         {   DEBUG_DTD_TOKEN; \
107                 DtdParse(pParser, (token_type), new_token(yytext), build_data); \
108                 if(build_data->error->len > 0) yyterminate(); \
109         }
110
111
112 #define YY_INPUT(buff,result,max_size) ( (result) = my_yyinput((buff),(max_size)) )
113
114 %}
115
116 comment_start "<!--"
117 comment_stop "-->"
118
119 start_xmlpi "<?"
120
121 location_xmlpi "wireshark:location"
122 protocol_xmlpi "wireshark:protocol"
123
124 get_attr_quote =[:blank:]*["]
125 avoid_editor_bug ["]
126
127 get_location_xmlpi  [^[:blank:]]+
128
129 stop_xmlpi "?>"
130
131 notation_tag       "<!"[:blank:]*NOTATION
132
133 special_start  "<!"
134 special_stop   ">"
135 whitespace     [[:blank:]\r\n]+
136 newline        \n
137 attlist_kw     ATTLIST
138 doctype_kw     DOCTYPE
139 element_kw     ELEMENT
140
141 pcdata         #PCDATA
142 any            ANY
143 cdata          #CDATA
144
145 iD             ID
146 idref          IDREF
147 idrefs         IDREFS
148 nmtoken        NMTOKEN
149 nmtokens       NMTOKENS
150 entity         ENTITY
151 entities       ENTITIES
152 notation       NOTATION
153 cdata_t        CDATA
154
155 empty          EMPTY
156 defaulT        #DEFAULT
157 fixed          #FIXED
158 required       #REQUIRED
159 implied        #IMPLIED
160
161 star           "*"
162 question       "?"
163 plus           "+"
164 open_parens    "("
165 close_parens   ")"
166 open_bracket   "["
167 close_bracket  "]"
168 comma          ","
169 pipe           "|"
170 dquote         ["]
171
172 name           [A-Za-z0-9][-a-zA-Z0-9_]*
173 dquoted        ["][^\"]*["]
174 squoted        ['][^\']*[']
175
176 %START DTD XMLPI LOCATION DONE PROTOCOL GET_ATTR_QUOTE GET_ATTR_VAL GET_ATTR_CLOSE_QUOTE IN_COMMENT IN_NOTATION 
177 %%
178
179 {whitespace}            ;
180
181
182 <DTD>{comment_start}            { BEGIN IN_COMMENT; }
183 <IN_COMMENT>[^-]?                               |
184 <IN_COMMENT>[-]                                 ;
185 <IN_COMMENT>{comment_stop}              { BEGIN DTD; }
186
187 <DTD>{notation_tag} { BEGIN IN_NOTATION; }
188 <IN_NOTATION>[^>]  ;
189 <IN_NOTATION>{special_stop} { BEGIN DTD; }
190
191 <DTD>{start_xmlpi}              {
192         BEGIN XMLPI;
193 }
194
195 <XMLPI>{location_xmlpi} {
196         BEGIN LOCATION;
197 }
198
199 <XMLPI>{protocol_xmlpi} {
200         BEGIN PROTOCOL;
201 }
202
203 <XMLPI><.> ;
204 <XMLPI>{stop_xmlpi} BEGIN DTD;
205
206 <LOCATION>{get_location_xmlpi} {
207     if(location) g_free(location);
208         location = g_strdup(yytext);
209         BEGIN DONE;
210 }
211
212 <DONE>{stop_xmlpi}  BEGIN DTD;
213
214 <PROTOCOL>{name} {
215         attr_name = g_strdup(yytext);
216         g_strdown(attr_name);
217         BEGIN GET_ATTR_QUOTE;
218 }
219
220 <GET_ATTR_QUOTE>{get_attr_quote} { BEGIN GET_ATTR_VAL; }
221
222 <GET_ATTR_QUOTE>. {
223         g_string_sprintfa(build_data->error,
224                                         "error in wireshark:protocol xmpli at %s : could not find attribute value!",
225                                         location);
226         yyterminate();
227 }
228
229 <GET_ATTR_VAL>[^"]+ {
230         /*"*/
231         struct _proto_xmlpi_attr* pa;
232         gboolean got_it = FALSE;
233         
234         for(pa = proto_attrs; pa->name; pa++) {
235                 if (g_ascii_strcasecmp(attr_name,pa->name) == 0) {
236                         pa->act(yytext);
237                         got_it = TRUE;
238                         break;
239                 }
240         }
241         
242         if (! got_it) {
243                 g_string_sprintfa(build_data->error,
244                                                 "error in wireshark:protocol xmpli at %s : no such parameter %s!",
245                                                 location, attr_name);
246                 g_free(attr_name);
247                 yyterminate();
248         }
249         
250         g_free(attr_name);
251                 
252         BEGIN GET_ATTR_CLOSE_QUOTE;
253 }
254
255 <GET_ATTR_CLOSE_QUOTE>{dquote} { BEGIN PROTOCOL;}
256
257 <PROTOCOL>{stop_xmlpi} BEGIN DTD;
258
259 <DTD>{special_start}         { DTD_PARSE(TOKEN_TAG_START); }
260 <DTD>{special_stop}          { DTD_PARSE(TOKEN_TAG_STOP); }
261
262 <DTD>{attlist_kw}            { DTD_PARSE(TOKEN_ATTLIST_KW); }
263 <DTD>{element_kw}            { DTD_PARSE(TOKEN_ELEMENT_KW); }
264 <DTD>{doctype_kw}            { DTD_PARSE(TOKEN_DOCTYPE_KW); }
265
266 <DTD>{pcdata}                { DTD_PARSE(TOKEN_ELEM_DATA); } 
267 <DTD>{any}                   { DTD_PARSE(TOKEN_ELEM_DATA); }
268 <DTD>{cdata}                 { DTD_PARSE(TOKEN_ELEM_DATA); }
269 <DTD>{empty}                             { DTD_PARSE(TOKEN_EMPTY_KW); }
270
271 <DTD>{iD}                                { DTD_PARSE(TOKEN_ATT_TYPE); }
272 <DTD>{idref}                 { DTD_PARSE(TOKEN_ATT_TYPE); }
273 <DTD>{idrefs}                { DTD_PARSE(TOKEN_ATT_TYPE); }
274 <DTD>{nmtoken}               { DTD_PARSE(TOKEN_ATT_TYPE); }
275 <DTD>{nmtokens}              { DTD_PARSE(TOKEN_ATT_TYPE); }
276 <DTD>{entity}                { DTD_PARSE(TOKEN_ATT_TYPE); }
277 <DTD>{entities}              { DTD_PARSE(TOKEN_ATT_TYPE); }
278 <DTD>{notation}              { DTD_PARSE(TOKEN_ATT_TYPE); }
279 <DTD>{cdata_t}               { DTD_PARSE(TOKEN_ATT_TYPE); }
280 <DTD>{defaulT}               { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
281 <DTD>{fixed}                 { DTD_PARSE(TOKEN_ATT_DEF_WITH_VALUE); }
282 <DTD>{required}              { DTD_PARSE(TOKEN_ATT_DEF); }
283 <DTD>{implied}               { DTD_PARSE(TOKEN_ATT_DEF); }
284
285 <DTD>{star}                  { DTD_PARSE(TOKEN_STAR); }
286 <DTD>{question}              { DTD_PARSE(TOKEN_QUESTION); }
287 <DTD>{plus}                  { DTD_PARSE(TOKEN_PLUS); }
288 <DTD>{comma}                  { DTD_PARSE(TOKEN_COMMA); }
289 <DTD>{open_parens}           { DTD_PARSE(TOKEN_OPEN_PARENS); }
290 <DTD>{close_parens}          { DTD_PARSE(TOKEN_CLOSE_PARENS); }
291 <DTD>{open_bracket}          { DTD_PARSE(TOKEN_OPEN_BRACKET); }
292 <DTD>{close_bracket}         { DTD_PARSE(TOKEN_CLOSE_BRACKET); }
293 <DTD>{pipe}                  { DTD_PARSE(TOKEN_PIPE); }
294
295 <DTD>{dquoted}               |
296 <DTD>{squoted}               { DTD_PARSE(TOKEN_QUOTED); }
297 <DTD>{name}                  { DTD_PARSE(TOKEN_NAME); }
298
299 %%
300
301 static dtd_token_data_t* new_token(gchar* text) {
302         dtd_token_data_t* t = g_malloc(sizeof(dtd_token_data_t));
303         
304         t->text = g_strdup(text);
305         t->location = g_strdup(location);
306         
307         return t;
308 }
309
310
311 static int my_yyinput(char* buff, guint size) {
312
313         if (offset >= len ) {
314                 return YY_NULL;
315         } else if ( offset + size <= len ) {
316                 memcpy(buff, input_string->str + offset,size);
317                 offset += size;
318                 return size;
319         } else {
320                 size = len - offset;
321                 memcpy(buff, input_string->str + offset,size);
322                 offset = len;
323                 return size;
324         }
325 }
326
327 extern dtd_build_data_t* dtd_parse(GString* s) {
328
329         input_string = s;
330         offset = 0;
331         len = input_string->len;
332         
333         pParser = DtdParseAlloc(g_malloc);
334
335 #ifdef DEBUG_DTD_PARSER
336         DtdParseTrace(stderr, ">>");
337 #endif
338     
339         build_data = g_malloc(sizeof(dtd_build_data_t));
340
341         build_data->proto_name = NULL;
342         build_data->media_type = NULL;
343         build_data->description = NULL;
344         build_data->proto_root = NULL;
345         build_data->recursion = FALSE;
346     
347         build_data->elements = g_ptr_array_new();
348         build_data->attributes = g_ptr_array_new();
349
350         build_data->error = g_string_new("");
351         
352         location = NULL;
353     
354         BEGIN DTD;
355         
356         yylex();
357
358         DtdParse(pParser, 0, NULL,build_data);
359
360         yyrestart(NULL);
361         
362         if (location) g_free(location);
363         
364         location = NULL;
365     
366         DtdParseFree(pParser, g_free );
367         
368         return build_data;
369 }